#Load libraries needed.
library(easypackages)
libraries("rgdal", "gdalUtils", "rgeos", "raster", "exactextractr", "maptools", "tigris", "ggplot2", "cowplot", "dplyr", "sp", "sf", "rgdal", "stringr", "RColorBrewer", "ggcorrplot", "pander", "forcats", "ggsn", "mapproj", "rlist", "lubridate", "ggpubr")#Reading in world countries shapefile.
data(wrld_simpl)
wrld_simpl@data[["NAME"]]## [1] Antigua and Barbuda
## [2] Algeria
## [3] Azerbaijan
## [4] Albania
## [5] Armenia
## [6] Angola
## [7] American Samoa
## [8] Argentina
## [9] Australia
## [10] Bahrain
## [11] Barbados
## [12] Bermuda
## [13] Bahamas
## [14] Bangladesh
## [15] Belize
## [16] Bosnia and Herzegovina
## [17] Bolivia
## [18] Burma
## [19] Benin
## [20] Solomon Islands
## [21] Brazil
## [22] Bulgaria
## [23] Brunei Darussalam
## [24] Canada
## [25] Cambodia
## [26] Sri Lanka
## [27] Congo
## [28] Democratic Republic of the Congo
## [29] Burundi
## [30] China
## [31] Afghanistan
## [32] Bhutan
## [33] Chile
## [34] Cayman Islands
## [35] Cameroon
## [36] Chad
## [37] Comoros
## [38] Colombia
## [39] Costa Rica
## [40] Central African Republic
## [41] Cuba
## [42] Cape Verde
## [43] Cook Islands
## [44] Cyprus
## [45] Denmark
## [46] Djibouti
## [47] Dominica
## [48] Dominican Republic
## [49] Ecuador
## [50] Egypt
## [51] Ireland
## [52] Equatorial Guinea
## [53] Estonia
## [54] Eritrea
## [55] El Salvador
## [56] Ethiopia
## [57] Austria
## [58] Czech Republic
## [59] French Guiana
## [60] Finland
## [61] Fiji
## [62] Falkland Islands (Malvinas)
## [63] Micronesia, Federated States of
## [64] French Polynesia
## [65] France
## [66] Gambia
## [67] Gabon
## [68] Georgia
## [69] Ghana
## [70] Grenada
## [71] Greenland
## [72] Germany
## [73] Guam
## [74] Greece
## [75] Guatemala
## [76] Guinea
## [77] Guyana
## [78] Haiti
## [79] Honduras
## [80] Croatia
## [81] Hungary
## [82] Iceland
## [83] India
## [84] Iran (Islamic Republic of)
## [85] Israel
## [86] Italy
## [87] Cote d'Ivoire
## [88] Iraq
## [89] Japan
## [90] Jamaica
## [91] Jordan
## [92] Kenya
## [93] Kyrgyzstan
## [94] Korea, Democratic People's Republic of
## [95] Kiribati
## [96] Korea, Republic of
## [97] Kuwait
## [98] Kazakhstan
## [99] Lao People's Democratic Republic
## [100] Lebanon
## [101] Latvia
## [102] Belarus
## [103] Lithuania
## [104] Liberia
## [105] Slovakia
## [106] Liechtenstein
## [107] Libyan Arab Jamahiriya
## [108] Madagascar
## [109] Martinique
## [110] Mongolia
## [111] Montserrat
## [112] The former Yugoslav Republic of Macedonia
## [113] Mali
## [114] Morocco
## [115] Mauritius
## [116] Mauritania
## [117] Malta
## [118] Oman
## [119] Maldives
## [120] Mexico
## [121] Malaysia
## [122] Mozambique
## [123] Malawi
## [124] New Caledonia
## [125] Niue
## [126] Niger
## [127] Aruba
## [128] Anguilla
## [129] Belgium
## [130] Hong Kong
## [131] Northern Mariana Islands
## [132] Faroe Islands
## [133] Andorra
## [134] Gibraltar
## [135] Isle of Man
## [136] Luxembourg
## [137] Macau
## [138] Monaco
## [139] Palestine
## [140] Montenegro
## [141] Mayotte
## [142] Aaland Islands
## [143] Norfolk Island
## [144] Cocos (Keeling) Islands
## [145] Antarctica
## [146] Bouvet Island
## [147] French Southern and Antarctic Lands
## [148] Heard Island and McDonald Islands
## [149] British Indian Ocean Territory
## [150] Christmas Island
## [151] United States Minor Outlying Islands
## [152] Vanuatu
## [153] Nigeria
## [154] Netherlands
## [155] Norway
## [156] Nepal
## [157] Nauru
## [158] Suriname
## [159] Nicaragua
## [160] New Zealand
## [161] Paraguay
## [162] Peru
## [163] Pakistan
## [164] Poland
## [165] Panama
## [166] Portugal
## [167] Papua New Guinea
## [168] Guinea-Bissau
## [169] Qatar
## [170] Reunion
## [171] Romania
## [172] Republic of Moldova
## [173] Philippines
## [174] Puerto Rico
## [175] Russia
## [176] Rwanda
## [177] Saudi Arabia
## [178] Saint Kitts and Nevis
## [179] Seychelles
## [180] South Africa
## [181] Lesotho
## [182] Botswana
## [183] Senegal
## [184] Slovenia
## [185] Sierra Leone
## [186] Singapore
## [187] Somalia
## [188] Spain
## [189] Saint Lucia
## [190] Sudan
## [191] Sweden
## [192] Syrian Arab Republic
## [193] Switzerland
## [194] Trinidad and Tobago
## [195] Thailand
## [196] Tajikistan
## [197] Tokelau
## [198] Tonga
## [199] Togo
## [200] Sao Tome and Principe
## [201] Tunisia
## [202] Turkey
## [203] Tuvalu
## [204] Turkmenistan
## [205] United Republic of Tanzania
## [206] Uganda
## [207] United Kingdom
## [208] Ukraine
## [209] United States
## [210] Burkina Faso
## [211] Uruguay
## [212] Uzbekistan
## [213] Saint Vincent and the Grenadines
## [214] Venezuela
## [215] British Virgin Islands
## [216] Viet Nam
## [217] United States Virgin Islands
## [218] Namibia
## [219] Wallis and Futuna Islands
## [220] Samoa
## [221] Swaziland
## [222] Yemen
## [223] Zambia
## [224] Zimbabwe
## [225] Indonesia
## [226] Guadeloupe
## [227] Netherlands Antilles
## [228] United Arab Emirates
## [229] Timor-Leste
## [230] Pitcairn Islands
## [231] Palau
## [232] Marshall Islands
## [233] Saint Pierre and Miquelon
## [234] Saint Helena
## [235] San Marino
## [236] Turks and Caicos Islands
## [237] Western Sahara
## [238] Serbia
## [239] Holy See (Vatican City)
## [240] Svalbard
## [241] Saint Martin
## [242] Saint Barthelemy
## [243] Guernsey
## [244] Jersey
## [245] South Georgia South Sandwich Islands
## [246] Taiwan
## 246 Levels: Aaland Islands Afghanistan Albania Algeria ... Zimbabwe
plot(wrld_simpl) #Reading in urban areas shapefile.
world.sf <- st_read("urban_shapefile/ne_10m_urban_areas_landscan.shp", crs = 4979) # Read shapefile as an sf object## Reading layer `ne_10m_urban_areas_landscan' from data source `/Users/alanaschreibman/COVID-19_NO2/urban_shapefile/ne_10m_urban_areas_landscan.shp' using driver `ESRI Shapefile'
## Simple feature collection with 6018 features and 25 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -175.2333 ymin: -54.85 xmax: 178.5333 ymax: 77.49167
## Geodetic CRS: WGS 84
us.urban.sf <- st_read("us_urban_shapefile/tl_2017_us_uac10.shp", crs = 4979) # Read shapefile as an sf object## Reading layer `tl_2017_us_uac10' from data source `/Users/alanaschreibman/COVID-19_NO2/us_urban_shapefile/tl_2017_us_uac10.shp' using driver `ESRI Shapefile'
## Simple feature collection with 3601 features and 12 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -170.7892 ymin: -14.37378 xmax: 145.7909 ymax: 71.30684
## Geodetic CRS: WGS 84
#Filtering COVID data (from COVID exploratory analysis.)
owid <- read.csv(url("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv"), na.strings = c("", "NA"), colClasses = c("date" = "Date"), header = TRUE)
owid.filtered <- owid %>%
rename(iso.code = iso_code, totcases = total_cases, totdeaths = total_deaths, totcases.mil = total_cases_per_million, totdeaths.mil = total_deaths_per_million, pop.density = population_density, med.age = median_age, older70 = aged_70_older, gdp.cap = gdp_per_capita, card.death = cardiovasc_death_rate, diab.prev = diabetes_prevalence, fem.smokers = female_smokers, male.smokers = male_smokers) %>%
select(c(iso.code:totcases, totdeaths, totcases.mil, totdeaths.mil, population, pop.density, med.age, older70, gdp.cap, card.death, diab.prev, fem.smokers, male.smokers)) %>% #Renamed variables and filtered to keep only those of interest.
filter(date == "2021-06-28") #Filtered out all but most recent date (most recent date is a running sum of total cases).
table(owid.filtered$continent, useNA = "ifany") #Data includes both continent and country-specific outcomes. Continent outcomes can be distinguished by "NA" in the continent column; the continent name is in the location column. ##
## Africa Asia Europe North America Oceania
## 54 48 51 33 16
## South America <NA>
## 12 9
cfr.df <- data.frame("case.fatality.rate"= owid.filtered$totdeaths / owid.filtered$totcases, "location" = owid.filtered$location)
owid.filtered <- merge(owid.filtered, cfr.df, by = "location")
rm(cfr.df)
owid.countries <- owid.filtered %>%
filter(!is.na(continent)) #Removed all rows with NA for continent, leaving behind only countries.
#Filtering to get weekly averages for new cases and new deaths
dates.covid.weekly <- seq(as.Date("2020-01-20"), as.Date("2021-07-04"), by=7)
owid.time.series.fxn <- function(t) {
df <- owid %>%
select(location, date, newcases = new_cases, newdeaths = new_deaths) %>%
filter(location=="World") %>%
filter(date %in% (as.Date(t):(as.Date(t) + 6)))
df
}
owid.time.series.list <- purrr::map(dates.covid.weekly, owid.time.series.fxn) #Creates list of data frames, each corresponding to a particular week during lockdown.
covid.mean.fxn <- function(x, column, lab) {
df <- data.frame(owid.time.series.list[[x]])
mean <- mean(df[[column]], na.rm = T)
Date <- as.Date(dates.covid.weekly[[x]])
Week <- paste0(as.character(as.Date(dates.covid.weekly[[x]])), " to ", as.character(as.Date(dates.covid.weekly[[x]]) + 6))
col.lab <- as.character(paste0("Mean_Weekly_", lab))
df <- data.frame(Date, Week, mean)
names(df)[3] <- as.character(paste0("Mean_Weekly_", lab))
df
}
x <- 1:76
mean.newcases.df <- purrr::pmap_dfr(list(x, "newcases", "Cases"), covid.mean.fxn)
mean.newdeaths.df <- purrr::pmap_dfr(list(x, "newdeaths", "Deaths"), covid.mean.fxn)setwd("./World_Rasters")
left <- raster('Left_Half_World_NO2.tif') %>%
flip(direction='y')
right <- raster('Right_Half_World_NO2.tif') %>%
flip(direction='y')
x <- list(left, right)
names(x)[1:2] <- c('x', 'y')
x$fun <- mean
x$na.rm <- TRUE
world.NO2.r <- do.call(mosaic, x)
rm(left, right, x)
#Converting to data frame for plotting.
r.crop <- crop(world.NO2.r, extent(wrld_simpl))
r.crop <- mask(r.crop, wrld_simpl)
r.pts <- rasterToPoints(r.crop, spatial = TRUE)
world.NO2.df <- data.frame(r.pts)
rm(r.pts, r.crop)
myPalette <- colorRampPalette(brewer.pal(9, "YlOrRd"))
my_theme <- function() {
theme_minimal() + # shorthand for white background color
theme(axis.line = element_blank(), # further customization of theme components
axis.text = element_blank(), # remove x and y axis text and labels
axis.title = element_blank(),
panel.grid = element_line(color = "white"), # make grid lines invisible
legend.key.size = unit(0.5, "cm"), # increase size of legend
legend.text = element_text(size = 7), # increase legend text size
legend.title = element_text(size = 7)) # increase legend title size
}
ggplot() +
geom_raster(data = world.NO2.df, aes(x = x, y = y, fill = layer)) +
geom_polygon(data = wrld_simpl, aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
my_theme() +
ggtitle("World NO2 Concentrations 07-10-2018 to 07-10-2019") +
#scale_fill_manual(breaks = c(0, 0.0000025, 0.000005, 0.0000075, 0.00001, 0.00002)) +
scale_fill_gradientn(name = "NO2 Concentration \n(mol/m^2)", colours = myPalette(100)) +
north(x.min = -180, y.min = -90, x.max = 180, y.max = 90, anchor = c(x=-140, y=-35), symbol=12) +
scalebar(x.min = -180, y.min = -90, x.max = 180, y.max = 90, dist = 1000, dist_unit = 'km', transform = TRUE, anchor = c(x=-140, y=-60), st.size = 2, model = 'WGS84') +
coord_equal()#R base plots.
plot(world.NO2.r)
plot(wrld_simpl, add = TRUE)wrld_simpl@data$NAME <- as.character(wrld_simpl@data$NAME)
#Changing shapefile country names to match COVID dataset:
wrld_simpl@data$NAME[wrld_simpl$NAME=="Brunei Darussalam"] <- "Brunei"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Czech Republic"] <- "Czechia"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Democratic Republic of the Congo"] <- "Democratic Republic of Congo"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Faroe Islands"] <- "Faeroe Islands"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Iran (Islamic Republic of)"] <- "Iran"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Lao People's Democratic Republic"] <- "Laos"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Libyan Arab Jamahiriya"] <- "Libya"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Republic of Moldova"] <- "Moldova"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Burma"] <- "Myanmar"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Korea, Republic of"] <- "South Korea"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Korea, Democratic People's Republic of"] <- "North Korea"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Syrian Arab Republic"] <- "Syria"
wrld_simpl@data$NAME[wrld_simpl$NAME=="The former Yugoslav Republic of Macedonia"] <- "North Macedonia"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Macau"] <- "Macao"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Micronesia, Federated States of"] <- "Micronesia (country)"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Timor-Leste"] <- "Timor"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Holy See (Vatican City)"] <- "Vatican"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Viet Nam"] <- "Vietnam"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Wallis and Futuna Islands"] <- "Wallis and Futuna"
#Missing from shapefiles but in OWID data: Kosovo, Tanzania, Eswatini
owid.countries$location[!(owid.countries$location %in% wrld_simpl@data$NAME)]## [1] "Curacao" "Eswatini"
## [3] "Kosovo" "Sint Maarten (Dutch part)"
## [5] "South Sudan" "Tanzania"
#Missing from OWID data but in shapefiles:
wrld_simpl@data$NAME[!(wrld_simpl@data$NAME%in% owid.countries$location)]## [1] "American Samoa"
## [2] "French Guiana"
## [3] "Falkland Islands (Malvinas)"
## [4] "Guam"
## [5] "North Korea"
## [6] "Martinique"
## [7] "Northern Mariana Islands"
## [8] "Mayotte"
## [9] "Aaland Islands"
## [10] "Norfolk Island"
## [11] "Cocos (Keeling) Islands"
## [12] "Antarctica"
## [13] "Bouvet Island"
## [14] "French Southern and Antarctic Lands"
## [15] "Heard Island and McDonald Islands"
## [16] "British Indian Ocean Territory"
## [17] "Christmas Island"
## [18] "United States Minor Outlying Islands"
## [19] "Reunion"
## [20] "Puerto Rico"
## [21] "Tokelau"
## [22] "Tonga"
## [23] "Tuvalu"
## [24] "Turkmenistan"
## [25] "United Republic of Tanzania"
## [26] "United States Virgin Islands"
## [27] "Swaziland"
## [28] "Guadeloupe"
## [29] "Netherlands Antilles"
## [30] "Pitcairn Islands"
## [31] "Palau"
## [32] "Saint Pierre and Miquelon"
## [33] "Saint Helena"
## [34] "Western Sahara"
## [35] "Svalbard"
## [36] "Saint Martin"
## [37] "Saint Barthelemy"
## [38] "South Georgia South Sandwich Islands"
#Function to extract mean NO2 values by country shapefile.
extraction.fxn <- function(x) {
r.vals <- raster::extract(world.NO2.r, wrld_simpl[wrld_simpl$NAME==as.character(x), ])
r.mean <- lapply(r.vals, FUN=mean, na.rm = TRUE)
final.df <- data.frame(wrld_simpl@data[wrld_simpl$NAME==as.character(x), ], r.mean)
rm(r.mean, r.vals)
names(final.df)[5] <- "location"
names(final.df)[6] <- "Area"
names(final.df)[10] <- "Longitude"
names(final.df)[11] <- "Latitude"
names(final.df)[12] <- "NO2_Concentration"
final.df <- final.df %>% dplyr::select(location, Area, Longitude, Latitude, NO2_Concentration)
}
#Extracting mean NO2 values and adding to coordinates data to make final data frame.
country.shp.list <- wrld_simpl@data$NAME
x <- country.shp.list
total.df <- purrr::map_dfr(x, extraction.fxn)
#Read in IHME regions data.
IHME.regions.df <- read.csv(file = 'IHME_regions.csv', header = TRUE)
IHME.regions.df <- IHME.regions.df %>% rename(location = Countries) %>% dplyr::select("Super.region", "Regions", "location")
#Merged geospatial data set with COVID data (variables come from COVID exploratory analysis RMD file).
covid.NO2.df <- right_join(IHME.regions.df, total.df, by = "location")
covid.NO2.df <- full_join(covid.NO2.df, owid.countries, by = "location")
covid.NO2.df$Regions <- as.factor(covid.NO2.df$Regions)
covid.NO2.df$Super.region <- as.factor(covid.NO2.df$Super.region)#isolate country using extract and crop for both pop and NO2
#calculate product of population and pollution for each cell using overlay function
#sum all of these products
#divide this sum by tot pop for country from df
pop.den.r <- raster("gpw_v4_population_density/pop.density.tif") #Read in GPW v4 population raster data.
pop.weighting.fxn <- function(x) {
pop.cropped <- crop(pop.den.r, extent(wrld_simpl[wrld_simpl$NAME==as.character(x), ]))
pop.cropped <- mask(pop.cropped, wrld_simpl[wrld_simpl$NAME==as.character(x), ])
NO2.cropped <- crop(world.NO2.r, extent(wrld_simpl[wrld_simpl$NAME==as.character(x), ]))
NO2.cropped <- mask(NO2.cropped, wrld_simpl[wrld_simpl$NAME==as.character(x), ])
overlay.cropped <- overlay(pop.cropped, NO2.cropped, fun=function(x,y){(x*y)} )
sum <- cellStats(overlay.cropped, stat='sum')
mod.covid.df <- filter(covid.NO2.df, location == as.character(x))
weighted.NO2 <- sum / mod.covid.df$population %>% data.frame()
df <- wrld_simpl@data[wrld_simpl$NAME==as.character(x), ]
weighted.NO2.df <- merge(df, weighted.NO2)
names(weighted.NO2.df)[5] <- "location"
names(weighted.NO2.df)[12] <- "pop.weighted.NO2"
weighted.NO2.df <- weighted.NO2.df %>% dplyr::select(location, pop.weighted.NO2)
} #Function to weight NO2 by population for each country.
x <- country.shp.list[country.shp.list!="Antarctica"]
weighted.NO2.df <- purrr::map_dfr(x, pop.weighting.fxn)
weighted.covid.NO2.df <- full_join(covid.NO2.df, weighted.NO2.df, by = "location") #Join all data frames.
write.csv(weighted.covid.NO2.df, file = "weighted.covid.NO2.df.csv")
rm(weighted.covid.NO2.df)weighted.covid.NO2.df <- read.csv(file = 'weighted.covid.NO2.df.csv')
wrld.df <- as.data.frame(wrld_simpl)
wrld.fort <- fortify(wrld_simpl, region = "NAME")
names(wrld.fort)[6] <- "location" #Getting world shapefile into data frame for ggplot.
covid.NO2.plot.df <- full_join(wrld.fort, weighted.covid.NO2.df, by ="location")
covid.NO2.plot.df <- arrange(covid.NO2.plot.df, order) #Joined full weighted COVID data set with shapefile data set.
map <- ggplot(data = covid.NO2.plot.df, aes(x = long, y = lat, group = group))
map +
geom_polygon(aes(fill = NO2_Concentration), color = 'black', size = 0.1) +
my_theme() +
ggtitle("World Raw NO2 Concentrations 07-10-2018 to 07-10-2019") +
scale_fill_gradientn(name = "NO2 Concentration \n(mol/m^2)", colours = myPalette(100)) +
north(x.min = -180, y.min = -90, x.max = 180, y.max = 90, anchor = c(x=-140, y=-35), symbol=12) +
scalebar(covid.NO2.plot.df, dist = 1000, dist_unit = 'km', transform = TRUE, anchor = c(x=-140, y=-60), st.size = 4, model = 'WGS84') +
coord_equal() #Plot of raw NO2 concentrations.
map +
geom_polygon(aes(fill = pop.weighted.NO2), color = 'black', size = 0.1) +
my_theme() +
ggtitle("World Weighted NO2 Concentrations 07-10-2018 to 07-10-2019") +
scale_fill_gradientn(name = "Weighted NO2 \nConcentration", colours = myPalette(100)) +
north(x.min = -180, y.min = -90, x.max = 180, y.max = 90, anchor = c(x=-140, y=-35), symbol=12) +
scalebar(covid.NO2.plot.df, dist = 1000, dist_unit = 'km', transform = TRUE, anchor = c(x=-140, y=-60), st.size = 4, model = 'WGS84') +
coord_equal() #Plot of population weighted NO2. high.covid <- filter(weighted.covid.NO2.df, totcases.mil >= median(weighted.covid.NO2.df$totcases.mil, na.rm = T))
low.covid <- filter(weighted.covid.NO2.df, totcases.mil < median(weighted.covid.NO2.df$totcases.mil, na.rm = T))
high.NO2 <- filter(weighted.covid.NO2.df, pop.weighted.NO2 >= median(weighted.covid.NO2.df$pop.weighted.NO2, na.rm = T))
low.NO2 <- filter(weighted.covid.NO2.df, pop.weighted.NO2 < median(weighted.covid.NO2.df$pop.weighted.NO2, na.rm = T))#Statistics on world NO2 concentrations.
covid.NO2.df %>%
summarize(variable = "NO2_Concentration", mean_NO2 = mean(NO2_Concentration, na.rm = T), st_dev_cty = sd(NO2_Concentration, na.rm = T)) %>%
pander| variable | mean_NO2 | st_dev_cty |
|---|---|---|
| NO2_Concentration | 1.692e-05 | 1.103e-05 |
covid.NO2.df %>%
summarize(variable = "NO2_Concentration",
q0.2 = quantile(NO2_Concentration, 0.2, na.rm = T),
q0.4 = quantile(NO2_Concentration, 0.4, na.rm = T),
q0.6 = quantile(NO2_Concentration, 0.6, na.rm = T),
q0.8 = quantile(NO2_Concentration, 0.8, na.rm = T)) %>%
pander| variable | q0.2 | q0.4 | q0.6 | q0.8 |
|---|---|---|---|---|
| NO2_Concentration | 8.389e-06 | 1.204e-05 | 1.728e-05 | 2.352e-05 |
#Histogram of NO2 concentrations.
covid.NO2.df %>%
ggplot(aes(NO2_Concentration)) +
geom_histogram(color = "black",fill = "grey") +
geom_vline(xintercept = mean(covid.NO2.df$NO2_Concentration), lwd = 2) +
labs(title = "Distribution of Tropospheric NO2",
x = "NO2 Concentration (mol/m^2)",
y = "Number of Countries") +
theme_minimal() +
scale_x_continuous(breaks = seq(0,8E-5,0.5E-5)) +
theme(axis.text.x = element_text(angle = 45,hjust=1))#Statistics on world COVID case statistics.
covid.NO2.df %>%
summarize(variable = "totcases.mil", mean_cases = mean(totcases.mil, na.rm = T), st_dev_cty = sd(totcases, na.rm = T)) %>%
pander| variable | mean_cases | st_dev_cty |
|---|---|---|
| totcases.mil | 35928 | 3611805 |
covid.NO2.df %>%
summarize(variable = "totcases.mil",
q0.2 = quantile(totcases.mil, 0.2, na.rm = TRUE),
q0.4 = quantile(totcases.mil, 0.4, na.rm = TRUE),
q0.6 = quantile(totcases.mil, 0.6, na.rm = TRUE),
q0.8 = quantile(totcases.mil, 0.8, na.rm = TRUE)) %>%
pander| variable | q0.2 | q0.4 | q0.6 | q0.8 |
|---|---|---|---|---|
| totcases.mil | 1662 | 8186 | 33123 | 73388 |
#Histogram of COVID case statistics.
max(covid.NO2.df$totcases.mil, na.rm = TRUE)## [1] 179667.4
covid.NO2.df %>%
ggplot(aes(totcases.mil)) +
geom_histogram(color = "black",fill = "grey") +
geom_vline(xintercept = mean(covid.NO2.df$totcases.mil, na.rm = T), lwd = 2) +
labs(title = "Distribution of World COVID-19 Cases per Million",
x = "Total COVID-19 Cases per Million",
y = "Number of Countries") +
theme_minimal() +
scale_x_continuous(breaks = seq(0, 179667.4,10000)) +
theme(axis.text.x = element_text(angle = 45,hjust=1))#Statistics on world COVID death statistics.
covid.NO2.df %>%
summarize(variable = "totdeaths.mil", mean_cases = mean(totcases.mil, na.rm = T), st_dev_cty = sd(totcases, na.rm = T)) %>%
pander| variable | mean_cases | st_dev_cty |
|---|---|---|
| totdeaths.mil | 35928 | 3611805 |
covid.NO2.df %>%
summarize(variable = "totdeaths.mil",
q0.2 = quantile(totdeaths.mil, 0.2, na.rm = TRUE),
q0.4 = quantile(totdeaths.mil, 0.4, na.rm = TRUE),
q0.6 = quantile(totdeaths.mil, 0.6, na.rm = TRUE),
q0.8 = quantile(totdeaths.mil, 0.8, na.rm = TRUE)) %>%
pander| variable | q0.2 | q0.4 | q0.6 | q0.8 |
|---|---|---|---|---|
| totdeaths.mil | 27.78 | 132.2 | 536.1 | 1310 |
#Histogram of COVID statistics.
max(covid.NO2.df$totdeaths.mil, na.rm = TRUE)## [1] 5820.087
covid.NO2.df %>%
ggplot(aes(totdeaths.mil)) +
geom_histogram(color = "black",fill = "grey") +
geom_vline(xintercept = mean(covid.NO2.df$totdeaths.mil, na.rm = TRUE), lwd = 2) +
labs(title = "Distribution of World COVID-19 Deaths per Million",
x = "Total COVID-19 Deaths per Million",
y = "Number of Countries") +
theme_minimal() +
scale_x_continuous(breaks = seq(0,5820.087,500)) +
theme(axis.text.x = element_text(angle = 45,hjust=1))#Summary statistics, NO2, grouping by Super.region
covid.NO2.df %>%
group_by(Super.region) %>%
summarize(mean_NO2 = mean(NO2_Concentration, na.rm = TRUE), sd_NO2 = sd(NO2_Concentration, na.rm = TRUE)) %>%
pander| Super.region | mean_NO2 | sd_NO2 |
|---|---|---|
| Central Europe, Eastern Europe, and Central Asia | 2.195e-05 | 7.39e-06 |
| High-income | 2.472e-05 | 1.7e-05 |
| Latin America and Caribbean | 1.132e-05 | 3.362e-06 |
| North Africa and the Middle East | 2.084e-05 | 9.85e-06 |
| South Asia | 2.998e-05 | 5.271e-06 |
| Southeast Asia, East Asia, and Oceania | 1.347e-05 | 1.135e-05 |
| Sub-Saharan Africa | 1.7e-05 | 6.46e-06 |
| NA | 1.149e-05 | 8.931e-06 |
#Summary statistics, COVID cases, grouping by Super.region
covid.NO2.df %>%
group_by(Super.region) %>%
summarize(mean_cases.mil = mean(totcases.mil, na.rm = TRUE), sd_cases.mil = sd(totcases.mil, na.rm = TRUE)) %>%
pander| Super.region | mean_cases.mil | sd_cases.mil |
|---|---|---|
| Central Europe, Eastern Europe, and Central Asia | 68485 | 39342 |
| High-income | 63327 | 42290 |
| Latin America and Caribbean | 30927 | 26009 |
| North Africa and the Middle East | 42067 | 39554 |
| South Asia | 11243 | 9764 |
| Southeast Asia, East Asia, and Oceania | 16298 | 41810 |
| Sub-Saharan Africa | 6235 | 11443 |
| NA | 41135 | 48023 |
#Summary statistics, COVID deaths, grouping by Super.region
covid.NO2.df %>%
group_by(Super.region) %>%
summarize(mean_deaths.mil = mean(totdeaths.mil, na.rm = TRUE), sd_deaths.mil = sd(totdeaths.mil, na.rm = TRUE)) %>%
pander| Super.region | mean_deaths.mil | sd_deaths.mil |
|---|---|---|
| Central Europe, Eastern Europe, and Central Asia | 1450 | 956.1 |
| High-income | 1014 | 728 |
| Latin America and Caribbean | 935.3 | 1165 |
| North Africa and the Middle East | 466.9 | 379.1 |
| South Asia | 157.5 | 135.2 |
| Southeast Asia, East Asia, and Oceania | 112.8 | 178.8 |
| Sub-Saharan Africa | 105.6 | 192.2 |
| NA | 765.5 | 906.5 |
#Histogram, NO2, grouping by Super.region
covid.NO2.df %>%
ggplot(aes(NO2_Concentration)) +
geom_histogram(color = "black",fill = "grey") +
labs(title = "Distribution of NO2 Concentrations Relative to Super Region",
x = "NO2 Concentration (mol/m^2)",
y = "Number of Countries") +
theme_minimal() +
scale_x_continuous(breaks = seq(0,8E-5,0.5E-5)) +
scale_y_continuous(breaks = seq(0,12,2)) +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
facet_grid(Super.region~.)#Histogram, COVID cases, grouping by Super.region
covid.NO2.df %>%
ggplot(aes(totcases.mil)) +
geom_histogram(color = "black",fill = "grey") +
labs(title = "Distribution of COVID Cases Relative to Super Region",
x = "COVID-19 Cases per Million",
y = "Number of Countries") +
theme_minimal() +
scale_x_continuous(breaks = seq(0, 179667.4,10000)) +
scale_y_continuous(breaks = seq(0,32,8)) +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
facet_grid(Super.region~.)#Histogram, COVID deaths, grouping by Super.region
covid.NO2.df %>%
ggplot(aes(totdeaths.mil)) +
geom_histogram(color = "black",fill = "grey") +
labs(title = "Distribution of COVID Deaths Relative to Super Region",
x = "COVID-19 Deaths per Million",
y = "Number of Countries") +
theme_minimal() +
scale_x_continuous(breaks = seq(0,5820.087,500)) +
scale_y_continuous(breaks = seq(0,32,8)) +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
facet_grid(Super.region~.)#Boxplot, NO2, grouping by Super.region
covid.NO2.df %>%
ggplot(aes(Super.region,NO2_Concentration)) +
geom_boxplot() +
geom_jitter(color="red", size=0.4, alpha=0.9) +
labs(title = "Distribution of NO2 relative to Super Region",
x = "Super Region",
y = "NO2 Concentration (mol/m^2)") +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
scale_y_continuous(breaks = seq(0,8E-5,0.5E-5)) #Boxplot, COVID cases, grouping by Super.region
covid.NO2.df %>%
ggplot(aes(Super.region,totcases.mil)) +
geom_boxplot() +
geom_jitter(color="red", size=0.4, alpha=0.9) +
labs(title = "Distribution of COVID Cases Relative to Super Region",
x = "Super Region",
y = "COVID-19 Cases per Million") +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
scale_y_continuous(breaks = seq(0, 179667.4,10000))#Boxplot, COVID deaths, grouping by Super.region
covid.NO2.df %>%
ggplot(aes(Super.region,totdeaths.mil)) +
geom_boxplot() +
geom_jitter(color="red", size=0.4, alpha=0.9) +
labs(title = "Distribution of COVID Deaths Relative to Super Region",
x = "Super Region",
y = "COVID-19 Deaths per Million") +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
scale_y_continuous(breaks = seq(0,5820.087,500))covid.NO2.df %>%
select(NO2_Concentration, totcases.mil, totdeaths.mil, case.fatality.rate, pop.density, med.age, card.death, diab.prev, fem.smokers, male.smokers) %>%
cor(use="pairwise.complete.obs") %>%
pander| Â | NO2_Concentration | totcases.mil | totdeaths.mil |
|---|---|---|---|
| NO2_Concentration | 1 | 0.3165 | 0.2235 |
| totcases.mil | 0.3165 | 1 | 0.7135 |
| totdeaths.mil | 0.2235 | 0.7135 | 1 |
| case.fatality.rate | -0.1053 | -0.1403 | 0.1426 |
| pop.density | 0.2045 | 0.0449 | -0.03193 |
| med.age | 0.3898 | 0.5935 | 0.5198 |
| card.death | -0.234 | -0.2975 | -0.2144 |
| diab.prev | -0.2264 | 0.02145 | -0.01058 |
| fem.smokers | 0.2739 | 0.5659 | 0.5681 |
| male.smokers | 0.04505 | 0.09914 | 0.07643 |
| Â | case.fatality.rate | pop.density | med.age |
|---|---|---|---|
| NO2_Concentration | -0.1053 | 0.2045 | 0.3898 |
| totcases.mil | -0.1403 | 0.0449 | 0.5935 |
| totdeaths.mil | 0.1426 | -0.03193 | 0.5198 |
| case.fatality.rate | 1 | -0.07071 | -0.1202 |
| pop.density | -0.07071 | 1 | 0.1484 |
| med.age | -0.1202 | 0.1484 | 1 |
| card.death | 0.223 | -0.1755 | -0.3436 |
| diab.prev | 0.02785 | 0.01338 | 0.1462 |
| fem.smokers | -0.05579 | -0.04709 | 0.6391 |
| male.smokers | -0.01647 | 0.0001981 | 0.1816 |
| Â | card.death | diab.prev | fem.smokers | male.smokers |
|---|---|---|---|---|
| NO2_Concentration | -0.234 | -0.2264 | 0.2739 | 0.04505 |
| totcases.mil | -0.2975 | 0.02145 | 0.5659 | 0.09914 |
| totdeaths.mil | -0.2144 | -0.01058 | 0.5681 | 0.07643 |
| case.fatality.rate | 0.223 | 0.02785 | -0.05579 | -0.01647 |
| pop.density | -0.1755 | 0.01338 | -0.04709 | 0.0001981 |
| med.age | -0.3436 | 0.1462 | 0.6391 | 0.1816 |
| card.death | 1 | 0.1519 | -0.1427 | 0.4285 |
| diab.prev | 0.1519 | 1 | 0.09079 | 0.2061 |
| fem.smokers | -0.1427 | 0.09079 | 1 | 0.2261 |
| male.smokers | 0.4285 | 0.2061 | 0.2261 | 1 |
#Visual representation of correlations.
covid.NO2.df %>%
select(NO2_Concentration, totcases.mil, totdeaths.mil, case.fatality.rate, pop.density, med.age, card.death, diab.prev, fem.smokers, male.smokers) %>%
cor(use="pairwise.complete.obs") %>%
ggcorrplot(type = "lower", ggtheme = theme_minimal, colors = c("#6D9EC1","white","#E46726"),
show.diag = T,
lab = T, lab_size = 2, #shows correlation values and sets size of text
title = "Correlation Matrix for the covid.NO2 dataset",
legend.title = "Correlation Value",
outline.color = "white",
hc.order = T) #orders by variables most related#NO2 concentration vs COVID cases.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = totcases.mil)) +
geom_point(color = "#333aff", alpha = 1, size = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = "World COVID-19 Cases per Million People vs. NO2 Concentration") cor(covid.NO2.df$totcases.mil, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] 0.3165363
NO2.lm.cases <- lm(totcases.mil ~ NO2_Concentration, data = covid.NO2.df)
summary.lm(NO2.lm.cases)##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -90810 -28883 -12346 22261 137741
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.364e+04 5.741e+03 2.376 0.0185 *
## NO2_Concentration 1.211e+09 2.660e+08 4.551 9.62e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 39170 on 186 degrees of freedom
## (64 observations deleted due to missingness)
## Multiple R-squared: 0.1002, Adjusted R-squared: 0.09536
## F-statistic: 20.71 on 1 and 186 DF, p-value: 9.621e-06
confint(NO2.lm.cases) #Summary statistics for linear fit. ## 2.5 % 97.5 %
## (Intercept) 2.313445e+03 2.496619e+04
## NO2_Concentration 6.858510e+08 1.735461e+09
#NO2 concentration vs COVID deaths.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = totdeaths.mil)) +
geom_point(color = "#333aff", alpha = 1, size = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = "World COVID-19 Deaths per Million People vs. NO2 Concentration") cor(covid.NO2.df$totdeaths.mil, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] 0.223474
NO2.lm.deaths <- lm(totdeaths.mil ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.deaths)##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1484.4 -571.6 -372.9 387.4 5333.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.332e+02 1.284e+02 2.596 0.01021 *
## NO2_Concentration 1.797e+07 5.858e+06 3.067 0.00249 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 841 on 179 degrees of freedom
## (71 observations deleted due to missingness)
## Multiple R-squared: 0.04994, Adjusted R-squared: 0.04463
## F-statistic: 9.409 on 1 and 179 DF, p-value: 0.002494
confint(NO2.lm.deaths) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) 7.992266e+01 5.864711e+02
## NO2_Concentration 6.409097e+06 2.952679e+07
regions.list <- c("High-income", "Latin America and Caribbean", "Sub-Saharan Africa", "North Africa and the Middle East", "South Asia", "Southeast Asia, East Asia, and Oceania", "Central Europe, Eastern Europe, and Central Asia")
sub.regions.list <- c("High-income Asia Pacific", "High-income Australasia", "High-income North America", "High-income Southern Latin America", "High-income Western Europe", "Caribbean", "Central Latin America", "Tropical Latin America", "Andean Latin America", "Central sub-Saharan Africa", "Eastern sub-Saharan Africa", "Southern sub-Saharan Africa", "Western sub-Saharan Africa", "North Africa and the Middle East", "South Asia", "East Asia", "Oceania", "Southeast Asia", "Central Asia", "Central Europe", "Eastern Europe")
#Function for bivariate analysis by region.
cases.NO2 <- function(x) {
covid.NO2.df %>%
filter(Super.region == x) %>%
filter(!is.na(totdeaths.mil)) %>%
filter(!is.na(NO2_Concentration)) %>%
ggplot(aes(x = NO2_Concentration, y = totcases.mil)) +
geom_point(color = "#333aff", size = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = paste0("COVID-19 Cases per Million People in ", (Super.region = x), " vs. NO2 Concentration"))
}
NO2.lm.cases <- function(x) {
covid.NO2.df %>%
filter(Super.region == x) %>%
filter(!is.na(totdeaths.mil)) %>%
filter(!is.na(NO2_Concentration)) %>%
lm(totcases.mil ~ NO2_Concentration,.) %>%
summary.lm()
} #Function for linear regression statistics to be included in for loop below.
for (i in regions.list) {
print(cases.NO2(i))
print(NO2.lm.cases(i))
} #Loops function for continents of interest. ##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -78182 -33098 9082 23797 114373
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52150 13590 3.838 0.000573 ***
## NO2_Concentration 438853382 448383204 0.979 0.335282
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 42320 on 31 degrees of freedom
## Multiple R-squared: 0.02998, Adjusted R-squared: -0.001316
## F-statistic: 0.9579 on 1 and 31 DF, p-value: 0.3353
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32144 -16754 -7505 2170 62157
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43205 17159 2.518 0.0183 *
## NO2_Concentration -987233958 1439547473 -0.686 0.4989
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26160 on 26 degrees of freedom
## Multiple R-squared: 0.01777, Adjusted R-squared: -0.02001
## F-statistic: 0.4703 on 1 and 26 DF, p-value: 0.4989
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9740 -5594 -3283 -1344 48613
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13633 6738 2.023 0.0496 *
## NO2_Concentration -454287265 399732943 -1.136 0.2624
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11400 on 41 degrees of freedom
## Multiple R-squared: 0.03054, Adjusted R-squared: 0.006895
## F-statistic: 1.292 on 1 and 41 DF, p-value: 0.2624
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43562 -23508 -12602 19146 94637
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4333 17595 -0.246 0.8081
## NO2_Concentration 2226431334 766638920 2.904 0.0091 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33770 on 19 degrees of freedom
## Multiple R-squared: 0.3074, Adjusted R-squared: 0.271
## F-statistic: 8.434 on 1 and 19 DF, p-value: 0.009095
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## 1 2 3 4 5
## -9195 -7454 9986 10206 -3543
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2922 31378 -0.093 0.932
## NO2_Concentration 472550653 1034079847 0.457 0.679
##
## Residual standard error: 10900 on 3 degrees of freedom
## Multiple R-squared: 0.06508, Adjusted R-squared: -0.2466
## F-statistic: 0.2088 on 1 and 3 DF, p-value: 0.6787
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36541 -25947 -10363 168 121820
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.611e+04 1.930e+04 2.389 0.0296 *
## NO2_Concentration -1.636e+09 1.047e+09 -1.563 0.1376
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 44650 on 16 degrees of freedom
## Multiple R-squared: 0.1324, Adjusted R-squared: 0.07822
## F-statistic: 2.443 on 1 and 16 DF, p-value: 0.1376
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56913 -26986 -7456 21585 92022
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.361e+03 2.029e+04 0.363 0.71967
## NO2_Concentration 2.772e+09 8.726e+08 3.177 0.00382 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 34030 on 26 degrees of freedom
## Multiple R-squared: 0.2796, Adjusted R-squared: 0.2519
## F-statistic: 10.09 on 1 and 26 DF, p-value: 0.003815
deaths.NO2 <- function(x) {
covid.NO2.df %>%
filter(Super.region == x) %>%
filter(!is.na(totdeaths.mil)) %>%
filter(!is.na(NO2_Concentration)) %>%
ggplot(aes(x = NO2_Concentration, y = totdeaths.mil)) +
geom_point(color = "#333aff", size = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = paste0("COVID-19 Deaths per Million People in ", (Super.region = x), " vs. NO2 Concentration"))
}
NO2.lm.deaths <- function(x) {
covid.NO2.df %>%
filter(Super.region == x) %>%
filter(!is.na(totdeaths.mil)) %>%
filter(!is.na(NO2_Concentration)) %>%
lm(totdeaths.mil ~ NO2_Concentration,.) %>%
summary.lm() #Function for linear regression statistics to be included in for loop below.
}
for (i in regions.list) {
print(deaths.NO2(i))
print(NO2.lm.deaths(i))
} #Loops function for continents of interest.##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1227.49 -777.20 60.23 659.17 1133.86
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 856.1 235.1 3.641 0.000979 ***
## NO2_Concentration 6198622.6 7757195.6 0.799 0.430324
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 732.1 on 31 degrees of freedom
## Multiple R-squared: 0.02018, Adjusted R-squared: -0.01143
## F-statistic: 0.6385 on 1 and 31 DF, p-value: 0.4303
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -958.7 -603.5 -376.9 335.6 4779.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.352e+03 7.743e+02 1.746 0.0925 .
## NO2_Concentration -3.652e+07 6.496e+07 -0.562 0.5788
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1180 on 26 degrees of freedom
## Multiple R-squared: 0.01201, Adjusted R-squared: -0.02599
## F-statistic: 0.3161 on 1 and 26 DF, p-value: 0.5788
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -105.40 -86.65 -69.60 -11.17 915.43
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 126.8 114.9 1.104 0.276
## NO2_Concentration -1301481.1 6816614.0 -0.191 0.850
##
## Residual standard error: 194.5 on 41 degrees of freedom
## Multiple R-squared: 0.0008883, Adjusted R-squared: -0.02348
## F-statistic: 0.03645 on 1 and 41 DF, p-value: 0.8495
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -460.2 -226.6 -129.6 173.6 861.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.023e+02 1.805e+02 0.567 0.5775
## NO2_Concentration 1.749e+07 7.864e+06 2.225 0.0384 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 346.4 on 19 degrees of freedom
## Multiple R-squared: 0.2066, Adjusted R-squared: 0.1649
## F-statistic: 4.949 on 1 and 19 DF, p-value: 0.03843
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## 1 2 3 4 5
## -108.53 -143.81 122.43 149.24 -19.33
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.313e-01 4.400e+02 0.001 1.000
## NO2_Concentration 5.247e+06 1.450e+07 0.362 0.741
##
## Residual standard error: 152.9 on 3 degrees of freedom
## Multiple R-squared: 0.04182, Adjusted R-squared: -0.2776
## F-statistic: 0.1309 on 1 and 3 DF, p-value: 0.7414
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -172.33 -125.39 -27.02 49.87 515.71
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.137e+02 7.379e+01 2.897 0.0105 *
## NO2_Concentration -6.528e+06 4.001e+06 -1.632 0.1222
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 170.7 on 16 degrees of freedom
## Multiple R-squared: 0.1427, Adjusted R-squared: 0.0891
## F-statistic: 2.663 on 1 and 16 DF, p-value: 0.1222
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1217.34 -503.24 13.44 411.01 1779.79
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -440.9 429.5 -1.026 0.314
## NO2_Concentration 85772376.4 18474796.8 4.643 8.63e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 720.4 on 26 degrees of freedom
## Multiple R-squared: 0.4533, Adjusted R-squared: 0.4322
## F-statistic: 21.55 on 1 and 26 DF, p-value: 8.633e-05
#NO2 concentration vs COVID cases.
ggplot(data = weighted.covid.NO2.df, aes(x = pop.weighted.NO2, y = totcases.mil)) +
geom_point(color = "#333aff", alpha = 1, size = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "Weighted NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = "World COVID-19 Cases per Million People vs. Weighted NO2 Concentration") cor(weighted.covid.NO2.df$totcases.mil, weighted.covid.NO2.df$pop.weighted.NO2, use = "complete.obs")## [1] 0.1993439
NO2.lm.cases <- lm(totcases.mil ~ pop.weighted.NO2, data = weighted.covid.NO2.df)
summary.lm(NO2.lm.cases)##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2, data = weighted.covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -57105 -30596 -15063 26642 151847
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.782e+04 4.250e+03 6.547 5.57e-10 ***
## pop.weighted.NO2 4.406e+12 1.588e+12 2.774 0.0061 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 40460 on 186 degrees of freedom
## (64 observations deleted due to missingness)
## Multiple R-squared: 0.03974, Adjusted R-squared: 0.03458
## F-statistic: 7.697 on 1 and 186 DF, p-value: 0.006095
confint(NO2.lm.cases) #Summary statistics for linear fit. ## 2.5 % 97.5 %
## (Intercept) 1.943685e+04 3.620427e+04
## pop.weighted.NO2 1.273021e+12 7.539258e+12
#NO2 concentration vs COVID deaths.
ggplot(data = weighted.covid.NO2.df, aes(x = pop.weighted.NO2, y = totdeaths.mil)) +
geom_point(color = "#333aff", alpha = 1, size = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "Weighted NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = "World COVID-19 Deaths per Million People vs. Weighted NO2 Concentration") cor(weighted.covid.NO2.df$totdeaths.mil, weighted.covid.NO2.df$pop.weighted.NO2, use = "complete.obs")## [1] 0.176339
NO2.lm.deaths <- lm(totdeaths.mil ~ pop.weighted.NO2, data = weighted.covid.NO2.df,)
summary.lm(NO2.lm.deaths)##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2, data = weighted.covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1272.7 -577.4 -403.9 348.4 5236.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.139e+02 9.285e+01 5.534 1.1e-07 ***
## pop.weighted.NO2 8.161e+10 3.405e+10 2.397 0.0176 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 849.3 on 179 degrees of freedom
## (71 observations deleted due to missingness)
## Multiple R-squared: 0.0311, Adjusted R-squared: 0.02568
## F-statistic: 5.745 on 1 and 179 DF, p-value: 0.01757
confint(NO2.lm.deaths) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) 3.306167e+02 6.970753e+02
## pop.weighted.NO2 1.441993e+10 1.487972e+11
regions.list <- c("High-income", "Latin America and Caribbean", "Sub-Saharan Africa", "North Africa and the Middle East", "South Asia", "Southeast Asia, East Asia, and Oceania", "Central Europe, Eastern Europe, and Central Asia")
sub.regions.list <- c("High-income Asia Pacific", "High-income Australasia", "High-income North America", "High-income Southern Latin America", "High-income Western Europe", "Caribbean", "Central Latin America", "Tropical Latin America", "Andean Latin America", "Central sub-Saharan Africa", "Eastern sub-Saharan Africa", "Southern sub-Saharan Africa", "Western sub-Saharan Africa", "North Africa and the Middle East", "South Asia", "East Asia", "Oceania", "Southeast Asia", "Central Asia", "Central Europe", "Eastern Europe")
#Function for bivariate analysis by region.
cases.NO2.w <- function(x) {
weighted.covid.NO2.df %>%
filter(Super.region == x) %>%
filter(!is.na(totdeaths.mil)) %>%
filter(!is.na(pop.weighted.NO2)) %>%
ggplot(aes(x = pop.weighted.NO2, y = totcases.mil)) +
geom_point(color = "#333aff", size = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "Weighted NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = paste0("COVID-19 Cases per Million People in ", (Super.region = x), " vs. Weighted NO2 Concentration"))
}
NO2.lm.cases.w <- function(x) {
weighted.covid.NO2.df %>%
filter(Super.region == x) %>%
filter(!is.na(totdeaths.mil)) %>%
filter(!is.na(pop.weighted.NO2)) %>%
lm(totcases.mil ~ pop.weighted.NO2,.) %>%
summary.lm()
} #Function for linear regression statistics to be included in for loop below.
for (i in regions.list) {
print(cases.NO2.w(i))
print(NO2.lm.cases.w(i))
} #Loops function for continents of interest. ##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64931 -40260 10414 30182 112615
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.705e+04 1.263e+04 5.309 8.85e-06 ***
## pop.weighted.NO2 -1.385e+12 3.788e+12 -0.366 0.717
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 42870 on 31 degrees of freedom
## Multiple R-squared: 0.004294, Adjusted R-squared: -0.02783
## F-statistic: 0.1337 on 1 and 31 DF, p-value: 0.7171
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40870 -15279 -7329 5880 57138
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.771e+04 7.714e+03 3.592 0.00134 **
## pop.weighted.NO2 4.663e+12 6.534e+12 0.714 0.48181
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26140 on 26 degrees of freedom
## Multiple R-squared: 0.01921, Adjusted R-squared: -0.01851
## F-statistic: 0.5093 on 1 and 26 DF, p-value: 0.4818
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6736 -4698 -4014 -1864 50141
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.132e+03 3.848e+03 2.114 0.0407 *
## pop.weighted.NO2 -1.341e+12 2.418e+12 -0.554 0.5823
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11540 on 41 degrees of freedom
## Multiple R-squared: 0.007442, Adjusted R-squared: -0.01677
## F-statistic: 0.3074 on 1 and 41 DF, p-value: 0.5823
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41663 -36786 -6911 24056 117570
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.845e+04 1.178e+04 3.263 0.00409 **
## pop.weighted.NO2 1.228e+12 2.656e+12 0.462 0.64916
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 40360 on 19 degrees of freedom
## Multiple R-squared: 0.01112, Adjusted R-squared: -0.04093
## F-statistic: 0.2137 on 1 and 19 DF, p-value: 0.6492
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2, data = .)
##
## Residuals:
## 1 2 3 4 5
## -3081.2 -1309.4 11898.8 292.2 -7800.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.558e+03 1.220e+04 -0.537 0.628
## pop.weighted.NO2 5.292e+12 3.450e+12 1.534 0.223
##
## Residual standard error: 8440 on 3 degrees of freedom
## Multiple R-squared: 0.4396, Adjusted R-squared: 0.2528
## F-statistic: 2.353 on 1 and 3 DF, p-value: 0.2226
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33382 -23708 -14100 -5892 123597
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.482e+04 1.461e+04 2.383 0.0299 *
## pop.weighted.NO2 -9.426e+12 6.723e+12 -1.402 0.1800
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 45240 on 16 degrees of freedom
## Multiple R-squared: 0.1094, Adjusted R-squared: 0.05374
## F-statistic: 1.965 on 1 and 16 DF, p-value: 0.18
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56366 -25035 -83 21727 77966
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.417e+04 1.960e+04 1.233 0.2285
## pop.weighted.NO2 1.522e+13 6.308e+12 2.413 0.0232 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 36240 on 26 degrees of freedom
## Multiple R-squared: 0.183, Adjusted R-squared: 0.1516
## F-statistic: 5.823 on 1 and 26 DF, p-value: 0.02317
deaths.NO2.w <- function(x) {
weighted.covid.NO2.df %>%
filter(Super.region == x) %>%
filter(!is.na(totdeaths.mil)) %>%
filter(!is.na(pop.weighted.NO2)) %>%
ggplot(aes(x = pop.weighted.NO2, y = totdeaths.mil)) +
geom_point(color = "#333aff", size = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "Weighted NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = paste0("COVID-19 Deaths per Million People in ", (Super.region = x), " vs. Weighted NO2 Concentration"))
}
NO2.lm.deaths.w <- function(x) {
weighted.covid.NO2.df %>%
filter(Super.region == x) %>%
filter(!is.na(totdeaths.mil)) %>%
filter(!is.na(pop.weighted.NO2)) %>%
lm(totdeaths.mil ~ pop.weighted.NO2,.) %>%
summary.lm() #Function for linear regression statistics to be included in for loop below.
}
for (i in regions.list) {
print(deaths.NO2.w(i))
print(NO2.lm.deaths.w(i))
} #Loops function for continents of interest.##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1053.79 -840.99 32.76 668.09 1157.04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.185e+02 2.168e+02 4.236 0.000189 ***
## pop.weighted.NO2 3.551e+10 6.503e+10 0.546 0.588907
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 736.1 on 31 degrees of freedom
## Multiple R-squared: 0.009528, Adjusted R-squared: -0.02242
## F-statistic: 0.2982 on 1 and 31 DF, p-value: 0.5889
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1334.0 -611.0 -308.5 285.3 4895.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.607e+02 3.476e+02 2.188 0.0378 *
## pop.weighted.NO2 1.926e+11 2.944e+11 0.654 0.5188
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1178 on 26 degrees of freedom
## Multiple R-squared: 0.01619, Adjusted R-squared: -0.02165
## F-statistic: 0.4278 on 1 and 26 DF, p-value: 0.5188
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -149.73 -103.62 -58.94 -6.57 808.58
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.223e+01 6.391e+01 0.661 0.512
## pop.weighted.NO2 4.480e+10 4.017e+10 1.115 0.271
##
## Residual standard error: 191.7 on 41 degrees of freedom
## Multiple R-squared: 0.02945, Adjusted R-squared: 0.005776
## F-statistic: 1.244 on 1 and 41 DF, p-value: 0.2712
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -425.56 -322.47 -72.51 285.37 788.37
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.127e+02 1.120e+02 3.684 0.00158 **
## pop.weighted.NO2 1.839e+10 2.525e+10 0.728 0.47528
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 383.6 on 19 degrees of freedom
## Multiple R-squared: 0.02716, Adjusted R-squared: -0.02404
## F-statistic: 0.5305 on 1 and 19 DF, p-value: 0.4753
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2, data = .)
##
## Residuals:
## 1 2 3 4 5
## -27.14 -39.47 149.50 -11.88 -71.01
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.287e+02 1.442e+02 -0.893 0.438
## pop.weighted.NO2 8.508e+10 4.076e+10 2.087 0.128
##
## Residual standard error: 99.72 on 3 degrees of freedom
## Multiple R-squared: 0.5922, Adjusted R-squared: 0.4563
## F-statistic: 4.357 on 1 and 3 DF, p-value: 0.1281
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -152.68 -121.56 -54.04 75.22 526.69
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.648e+02 5.641e+01 2.921 0.010 **
## pop.weighted.NO2 -3.499e+10 2.596e+10 -1.348 0.196
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 174.7 on 16 degrees of freedom
## Multiple R-squared: 0.102, Adjusted R-squared: 0.04587
## F-statistic: 1.817 on 1 and 16 DF, p-value: 0.1964
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1291.22 -692.02 -88.65 581.03 1886.11
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.747e+02 4.858e+02 0.977 0.3374
## pop.weighted.NO2 3.351e+11 1.563e+11 2.143 0.0416 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 898.2 on 26 degrees of freedom
## Multiple R-squared: 0.1502, Adjusted R-squared: 0.1175
## F-statistic: 4.594 on 1 and 26 DF, p-value: 0.04162
#NO2 concentration vs population density.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = pop.density)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Population Density") +
labs(title = "World Population Density vs. NO2 Concentration") cor(covid.NO2.df$pop.density, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] 0.2045035
NO2.lm.pop <- lm(pop.density ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.pop)##
## Call:
## lm(formula = pop.density ~ NO2_Concentration, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2374.0 -516.0 -231.7 -3.8 19313.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -281.4 293.8 -0.958 0.33933
## NO2_Concentration 40668664.8 13904464.2 2.925 0.00385 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2095 on 196 degrees of freedom
## (54 observations deleted due to missingness)
## Multiple R-squared: 0.04182, Adjusted R-squared: 0.03693
## F-statistic: 8.555 on 1 and 196 DF, p-value: 0.003852
confint(NO2.lm.pop) #Summary statistics for linear fit. ## 2.5 % 97.5 %
## (Intercept) -860.8002 2.979961e+02
## NO2_Concentration 13247097.7417 6.809023e+07
#NO2 concentration vs median age.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = med.age)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Median Age") +
labs(title = "World Median Age by Country vs. NO2 Concentration") cor(covid.NO2.df$med.age, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] 0.3897704
NO2.lm.age <- lm(med.age ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.age)##
## Call:
## lm(formula = med.age ~ NO2_Concentration, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.2819 -7.6082 0.2393 6.8515 14.9838
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.437e+01 1.237e+00 19.70 < 2e-16 ***
## NO2_Concentration 3.281e+05 5.746e+04 5.71 4.53e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.4 on 182 degrees of freedom
## (68 observations deleted due to missingness)
## Multiple R-squared: 0.1519, Adjusted R-squared: 0.1473
## F-statistic: 32.6 on 1 and 182 DF, p-value: 4.528e-08
confint(NO2.lm.age) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) 21.92602 26.80561
## NO2_Concentration 214701.10076 441432.36905
#NO2 concentration vs GDP per capita.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = gdp.cap)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "GDP per Capita") +
labs(title = "World GDP per Capita vs. NO2 Concentration") cor(covid.NO2.df$gdp.cap, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] 0.3524915
NO2.lm.gdp <- lm(gdp.cap ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.gdp)##
## Call:
## lm(formula = gdp.cap ~ NO2_Concentration, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30294 -13965 -5412 6907 88128
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6948 2851 2.437 0.0158 *
## NO2_Concentration 680456994 133541576 5.095 8.62e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19450 on 183 degrees of freedom
## (67 observations deleted due to missingness)
## Multiple R-squared: 0.1243, Adjusted R-squared: 0.1195
## F-statistic: 25.96 on 1 and 183 DF, p-value: 8.62e-07
confint(NO2.lm.gdp) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) 1.322168e+03 12573.53
## NO2_Concentration 4.169779e+08 943936113.77
#NO2 concentration vs cardiovascular death.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = card.death)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Cardiovascular Death Rates") +
labs(title = "World Cardiovascular Death Rates vs. NO2 Concentration") cor(covid.NO2.df$card.death, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] -0.2339947
NO2.lm.card.death <- lm(card.death ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.card.death)##
## Call:
## lm(formula = card.death ~ NO2_Concentration, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -204.00 -91.69 -20.65 69.54 462.64
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.124e+02 1.746e+01 17.894 < 2e-16 ***
## NO2_Concentration -2.656e+06 8.180e+05 -3.247 0.00139 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 119.5 on 182 degrees of freedom
## (68 observations deleted due to missingness)
## Multiple R-squared: 0.05475, Adjusted R-squared: 0.04956
## F-statistic: 10.54 on 1 and 182 DF, p-value: 0.001389
confint(NO2.lm.card.death) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) 277.9489 346.8407
## NO2_Concentration -4270000.3363 -1041995.4937
#NO2 concentration vs diabetes prevalence.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = diab.prev)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Diabetes Prevalence Rates") +
labs(title = "World Diabetes Prevalence Rates vs. NO2 Concentration") cor(covid.NO2.df$diab.prev, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] -0.2263737
NO2.lm.diab <- lm(diab.prev ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.diab)##
## Call:
## lm(formula = diab.prev ~ NO2_Concentration, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.9664 -2.9376 -0.6631 2.3314 20.9495
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.014e+01 6.546e-01 15.485 < 2e-16 ***
## NO2_Concentration -9.990e+04 3.110e+04 -3.212 0.00155 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.623 on 191 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.05125, Adjusted R-squared: 0.04628
## F-statistic: 10.32 on 1 and 191 DF, p-value: 0.001547
confint(NO2.lm.diab) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) 8.845719e+00 11.42813
## NO2_Concentration -1.612537e+05 -38551.92814
#NO2 concentration vs COVID cases.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = totcases.mil)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = "World COVID-19 Cases per Million People vs. NO2 Concentration") cor(covid.NO2.df$totcases.mil, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] 0.3165363
NO2.lm.cases <- lm(totcases.mil ~ NO2_Concentration + gdp.cap + pop.density + med.age, data = covid.NO2.df)
summary.lm(NO2.lm.cases)##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -78822 -14001 -4366 9912 118443
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.057e+04 9.193e+03 -4.413 1.80e-05 ***
## NO2_Concentration 2.755e+08 2.418e+08 1.139 0.2563
## gdp.cap 2.713e-01 1.633e-01 1.661 0.0986 .
## pop.density -7.380e+00 3.021e+00 -2.443 0.0156 *
## med.age 2.239e+03 3.517e+02 6.367 1.71e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31160 on 171 degrees of freedom
## (76 observations deleted due to missingness)
## Multiple R-squared: 0.3929, Adjusted R-squared: 0.3787
## F-statistic: 27.67 on 4 and 171 DF, p-value: < 2.2e-16
confint(NO2.lm.cases) #Summary statistics for linear fit. ## 2.5 % 97.5 %
## (Intercept) -5.871916e+04 -2.242532e+04
## NO2_Concentration -2.018993e+08 7.528626e+08
## gdp.cap -5.116587e-02 5.936683e-01
## pop.density -1.334335e+01 -1.416295e+00
## med.age 1.545231e+03 2.933764e+03
#NO2 concentration vs COVID deaths.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = totdeaths.mil)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = "World COVID-19 Deaths per Million People vs. NO2 Concentration") cor(covid.NO2.df$totdeaths.mil, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] 0.223474
NO2.lm.deaths <- lm(totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density + med.age, data = covid.NO2.df,)
summary.lm(NO2.lm.deaths)##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1545.1 -361.5 -88.4 211.0 5187.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.114e+03 2.111e+02 -5.276 4.06e-07 ***
## NO2_Concentration 4.054e+06 5.544e+06 0.731 0.4657
## gdp.cap -8.090e-03 3.700e-03 -2.187 0.0302 *
## pop.density -1.727e-01 6.841e-02 -2.524 0.0125 *
## med.age 6.239e+01 7.971e+00 7.827 5.46e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 705.4 on 167 degrees of freedom
## (80 observations deleted due to missingness)
## Multiple R-squared: 0.3394, Adjusted R-squared: 0.3235
## F-statistic: 21.45 on 4 and 167 DF, p-value: 2.72e-14
confint(NO2.lm.deaths) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) -1.530810e+03 -6.970901e+02
## NO2_Concentration -6.890922e+06 1.499838e+07
## gdp.cap -1.539412e-02 -7.854305e-04
## pop.density -3.077398e-01 -3.761957e-02
## med.age 4.665229e+01 7.812782e+01
#Function for multivariate analysis by Super.region.
cases.NO2 <- function(x) {
covid.NO2.df %>%
filter(Super.region == x) %>%
ggplot(aes(x = NO2_Concentration, y = totcases.mil)) +
geom_point(color = "#333aff") +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = paste0("COVID-19 Cases per Million People in ", (Super.region = x), " vs. NO2 Concentration"))
}
NO2.lm.cases.reg <- function(x) {
covid.NO2.df %>%
filter(Super.region == x) %>%
lm(totcases.mil ~ NO2_Concentration + gdp.cap + pop.density + med.age,.) %>%
summary.lm()
} #Function for linear regression statistics to be included in for loop below.
for (i in regions.list) {
print(cases.NO2(i))
print(NO2.lm.cases.reg(i))
} #Loops function for continents of interest. ##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -80918 -20637 11757 21503 56252
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.415e+05 6.915e+04 2.046 0.0506 .
## NO2_Concentration 6.753e+08 4.508e+08 1.498 0.1457
## gdp.cap -2.958e-01 4.276e-01 -0.692 0.4950
## pop.density -3.902e+00 5.323e+00 -0.733 0.4699
## med.age -2.064e+03 1.714e+03 -1.204 0.2391
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37200 on 27 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.1368, Adjusted R-squared: 0.008908
## F-statistic: 1.07 on 4 and 27 DF, p-value: 0.3908
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36446 -9842 98 11484 54489
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.364e+05 6.274e+04 -2.174 0.040781 *
## NO2_Concentration 2.462e+09 1.499e+09 1.642 0.114759
## gdp.cap -1.129e+00 9.615e-01 -1.174 0.252926
## pop.density -1.453e+02 3.437e+01 -4.228 0.000346 ***
## med.age 5.968e+03 2.104e+03 2.837 0.009595 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20580 on 22 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.4785, Adjusted R-squared: 0.3837
## F-statistic: 5.046 on 4 and 22 DF, p-value: 0.004862
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12928 -5002 -554 2599 30947
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.130e+04 1.378e+04 -2.996 0.00486 **
## NO2_Concentration -4.610e+08 3.222e+08 -1.431 0.16091
## gdp.cap 2.543e-01 3.588e-01 0.709 0.48300
## pop.density -5.393e+00 1.110e+01 -0.486 0.62984
## med.age 2.794e+03 6.085e+02 4.592 4.94e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7897 on 37 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.5783, Adjusted R-squared: 0.5327
## F-statistic: 12.69 on 4 and 37 DF, p-value: 1.35e-06
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28226 -10085 -1834 7016 51322
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.839e+04 2.766e+04 -1.026 0.320947
## NO2_Concentration 6.601e+08 5.310e+08 1.243 0.232937
## gdp.cap 3.284e-01 1.977e-01 1.661 0.117453
## pop.density 5.675e+01 1.162e+01 4.886 0.000198 ***
## med.age 1.318e+03 1.058e+03 1.245 0.232072
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20020 on 15 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7965, Adjusted R-squared: 0.7423
## F-statistic: 14.68 on 4 and 15 DF, p-value: 4.54e-05
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## ALL 5 residuals are 0: no residual degrees of freedom!
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.033e+06 NaN NaN NaN
## NO2_Concentration -8.654e+10 NaN NaN NaN
## gdp.cap -1.565e+02 NaN NaN NaN
## pop.density 1.933e+02 NaN NaN NaN
## med.age 2.022e+05 NaN NaN NaN
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 4 and 0 DF, p-value: NA
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -69519 -7960 -2246 4542 88232
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.202e+04 4.154e+04 0.289 0.7761
## NO2_Concentration -1.175e+09 1.168e+09 -1.006 0.3293
## gdp.cap 3.167e+00 1.454e+00 2.178 0.0447 *
## pop.density 5.813e+01 2.509e+01 2.317 0.0341 *
## med.age -8.473e+02 2.107e+03 -0.402 0.6929
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31640 on 16 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.5774, Adjusted R-squared: 0.4718
## F-statistic: 5.465 on 4 and 16 DF, p-value: 0.005715
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32404 -21359 -7186 14258 90747
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.667e+04 4.108e+04 -2.110 0.0460 *
## NO2_Concentration 8.479e+08 1.654e+09 0.513 0.6130
## gdp.cap 5.136e-01 9.481e-01 0.542 0.5932
## pop.density 1.954e+01 2.755e+02 0.071 0.9441
## med.age 3.267e+03 1.374e+03 2.377 0.0262 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30220 on 23 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4975, Adjusted R-squared: 0.4101
## F-statistic: 5.693 on 4 and 23 DF, p-value: 0.002456
deaths.NO2 <- function(x) {
covid.NO2.df %>%
filter(Super.region == x) %>%
ggplot(aes(x = NO2_Concentration, y = totdeaths.mil)) +
geom_point(color = "#333aff") +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = paste0("COVID-19 Deaths per Million People in ", (Super.region = x), " vs. NO2 Concentration"))
}
NO2.lm.deaths.reg <- function(x) {
covid.NO2.df %>%
filter(Super.region == x) %>%
lm(totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density + med.age,.) %>%
summary.lm() #Function for linear regression statistics to be included in for loop below.
}
for (i in regions.list) {
print(deaths.NO2(i))
print(NO2.lm.deaths.reg(i))
} #Loops function for continents of interest.##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1311.58 -566.69 9.46 532.01 1056.38
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.027e+03 1.346e+03 0.762 0.452
## NO2_Concentration 5.949e+06 8.777e+06 0.678 0.504
## gdp.cap -1.140e-02 8.326e-03 -1.369 0.182
## pop.density -7.383e-02 1.036e-01 -0.712 0.482
## med.age 8.725e+00 3.338e+01 0.261 0.796
##
## Residual standard error: 724.2 on 27 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.1443, Adjusted R-squared: 0.01751
## F-statistic: 1.138 on 4 and 27 DF, p-value: 0.3597
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1308.9 -446.6 -12.6 258.7 4441.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.529e+03 3.356e+03 -0.753 0.4592
## NO2_Concentration 4.585e+07 8.021e+07 0.572 0.5733
## gdp.cap -3.965e-02 5.143e-02 -0.771 0.4490
## pop.density -4.825e+00 1.839e+00 -2.624 0.0155 *
## med.age 1.417e+02 1.125e+02 1.259 0.2212
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1101 on 22 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.2587, Adjusted R-squared: 0.1239
## F-statistic: 1.919 on 4 and 22 DF, p-value: 0.1428
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -245.62 -64.36 -21.13 51.06 483.56
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.329e+02 2.373e+02 -3.931 0.000357 ***
## NO2_Concentration 1.203e+06 5.549e+06 0.217 0.829502
## gdp.cap 1.500e-03 6.179e-03 0.243 0.809558
## pop.density -4.707e-02 1.911e-01 -0.246 0.806802
## med.age 5.193e+01 1.048e+01 4.956 1.61e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 136 on 37 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.5583, Adjusted R-squared: 0.5105
## F-statistic: 11.69 on 4 and 37 DF, p-value: 3.087e-06
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -386.42 -167.92 -62.21 131.12 585.64
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.698e+02 4.065e+02 -1.894 0.0777 .
## NO2_Concentration 1.645e+07 7.804e+06 2.108 0.0522 .
## gdp.cap -8.021e-03 2.906e-03 -2.760 0.0146 *
## pop.density 1.067e-01 1.707e-01 0.625 0.5413
## med.age 3.954e+01 1.556e+01 2.542 0.0226 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 294.3 on 15 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5256, Adjusted R-squared: 0.3991
## F-statistic: 4.154 on 4 and 15 DF, p-value: 0.01841
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## ALL 5 residuals are 0: no residual degrees of freedom!
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.834e+04 NaN NaN NaN
## NO2_Concentration -1.223e+09 NaN NaN NaN
## gdp.cap -2.207e+00 NaN NaN NaN
## pop.density 2.771e+00 NaN NaN NaN
## med.age 2.841e+03 NaN NaN NaN
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 4 and 0 DF, p-value: NA
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -284.60 -51.08 7.73 68.13 335.99
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.906e+01 2.084e+02 0.379 0.711
## NO2_Concentration -7.100e+06 6.049e+06 -1.174 0.263
## gdp.cap 1.282e-02 7.330e-03 1.749 0.106
## pop.density 9.886e-02 1.210e-01 0.817 0.430
## med.age -1.136e+00 1.019e+01 -0.112 0.913
##
## Residual standard error: 150.4 on 12 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.4931, Adjusted R-squared: 0.3242
## F-statistic: 2.919 on 4 and 12 DF, p-value: 0.06712
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1333.94 -396.57 5.37 308.10 1184.40
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.037e+03 8.427e+02 -3.603 0.00150 **
## NO2_Concentration 4.754e+07 3.392e+07 1.401 0.17447
## gdp.cap -1.507e-02 1.945e-02 -0.775 0.44625
## pop.density 7.823e-01 5.651e+00 0.138 0.89111
## med.age 9.481e+01 2.819e+01 3.363 0.00269 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 619.8 on 23 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.642, Adjusted R-squared: 0.5797
## F-statistic: 10.31 on 4 and 23 DF, p-value: 6.214e-05
#Weighted NO2 concentration vs COVID cases.
ggplot(data = weighted.covid.NO2.df, aes(x = pop.weighted.NO2, y = totcases.mil)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = "World COVID-19 Cases per Million People vs. Population Weighted NO2 Concentration") cor(weighted.covid.NO2.df$totcases.mil, weighted.covid.NO2.df$pop.weighted.NO2, use = "complete.obs") ## [1] 0.1993439
NO2.lm.cases <- lm(totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + med.age, data = weighted.covid.NO2.df)
summary.lm(NO2.lm.cases)##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = weighted.covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -77235 -14971 -4383 9250 121334
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.896e+04 9.102e+03 -4.280 3.10e-05 ***
## pop.weighted.NO2 4.978e+11 1.386e+12 0.359 0.7200
## gdp.cap 2.795e-01 1.658e-01 1.686 0.0937 .
## pop.density -6.937e+00 3.105e+00 -2.234 0.0268 *
## med.age 2.314e+03 3.481e+02 6.647 3.86e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31260 on 171 degrees of freedom
## (76 observations deleted due to missingness)
## Multiple R-squared: 0.3888, Adjusted R-squared: 0.3745
## F-statistic: 27.19 on 4 and 171 DF, p-value: < 2.2e-16
confint(NO2.lm.cases) #Summary statistics for linear fit. ## 2.5 % 97.5 %
## (Intercept) -5.692563e+04 -2.099168e+04
## pop.weighted.NO2 -2.238674e+12 3.234279e+12
## gdp.cap -4.779897e-02 6.068672e-01
## pop.density -1.306547e+01 -8.078500e-01
## med.age 1.626715e+03 3.001059e+03
#Weighted NO2 concentration vs COVID deaths.
ggplot(data = weighted.covid.NO2.df, aes(x = pop.weighted.NO2, y = totdeaths.mil)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = "World COVID-19 Deaths per Million People vs. Population Weighted NO2 Concentration") cor(weighted.covid.NO2.df$totdeaths.mil, weighted.covid.NO2.df$pop.weighted.NO2, use = "complete.obs") ## [1] 0.176339
NO2.lm.deaths <- lm(totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + med.age, data = weighted.covid.NO2.df,)
summary.lm(NO2.lm.deaths)##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = weighted.covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1538.1 -355.5 -80.0 207.9 5168.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.092e+03 2.079e+02 -5.254 4.49e-07 ***
## pop.weighted.NO2 1.679e+10 3.155e+10 0.532 0.5953
## gdp.cap -8.183e-03 3.746e-03 -2.185 0.0303 *
## pop.density -1.614e-01 7.013e-02 -2.302 0.0226 *
## med.age 6.303e+01 7.875e+00 8.004 1.94e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 705.9 on 167 degrees of freedom
## (80 observations deleted due to missingness)
## Multiple R-squared: 0.3384, Adjusted R-squared: 0.3225
## F-statistic: 21.35 on 4 and 167 DF, p-value: 3.075e-14
confint(NO2.lm.deaths) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) -1.502670e+03 -6.817902e+02
## pop.weighted.NO2 -4.550259e+10 7.908621e+10
## gdp.cap -1.557841e-02 -7.881128e-04
## pop.density -2.998430e-01 -2.295015e-02
## med.age 4.748233e+01 7.857581e+01
#Function for multivariate analysis by Super.region.
cases.NO2 <- function(x) {
weighted.covid.NO2.df %>%
filter(Super.region == x) %>%
ggplot(aes(x = pop.weighted.NO2, y = totcases.mil)) +
geom_point(color = "#333aff") +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = paste0("COVID-19 Cases per Million People in ", (Super.region = x), " vs. Population Weighted NO2 Concentration"))
}
NO2.lm.cases.reg <- function(x) {
weighted.covid.NO2.df %>%
filter(Super.region == x) %>%
lm(totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + med.age,.) %>%
summary.lm()
} #Function for linear regression statistics to be included in for loop below.
for (i in regions.list) {
print(cases.NO2(i))
print(NO2.lm.cases.reg(i))
} #Loops function for continents of interest. ##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -63683 -26278 12377 24428 65629
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.102e+05 6.927e+04 1.591 0.123
## pop.weighted.NO2 1.121e+12 3.965e+12 0.283 0.780
## gdp.cap -2.249e-01 4.425e-01 -0.508 0.615
## pop.density -3.908e+00 5.711e+00 -0.684 0.500
## med.age -1.030e+03 1.711e+03 -0.602 0.552
##
## Residual standard error: 38650 on 27 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.06781, Adjusted R-squared: -0.0703
## F-statistic: 0.491 on 4 and 27 DF, p-value: 0.7423
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39068 -9133 1164 9581 48875
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.157e+05 4.694e+04 -2.465 0.021973 *
## pop.weighted.NO2 1.249e+13 5.813e+12 2.148 0.042948 *
## gdp.cap -1.188e+00 9.161e-01 -1.297 0.208219
## pop.density -1.291e+02 2.959e+01 -4.364 0.000248 ***
## med.age 5.776e+03 1.835e+03 3.147 0.004680 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19820 on 22 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.5161, Adjusted R-squared: 0.4281
## F-statistic: 5.865 on 4 and 22 DF, p-value: 0.002276
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12682 -4271 -1596 3197 32440
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.181e+04 1.213e+04 -4.272 0.00013 ***
## pop.weighted.NO2 -7.071e+11 1.859e+12 -0.380 0.70582
## gdp.cap 1.171e-01 3.547e-01 0.330 0.74315
## pop.density -3.858e+00 1.158e+01 -0.333 0.74092
## med.age 3.013e+03 6.075e+02 4.960 1.6e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8096 on 37 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.5567, Adjusted R-squared: 0.5088
## F-statistic: 11.62 on 4 and 37 DF, p-value: 3.286e-06
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30024 -10681 -136 11610 50896
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.071e+04 2.837e+04 -0.730 0.477
## pop.weighted.NO2 1.822e+11 1.474e+12 0.124 0.903
## gdp.cap 3.569e-01 2.139e-01 1.668 0.116
## pop.density 6.330e+01 1.093e+01 5.790 3.57e-05 ***
## med.age 1.434e+03 1.107e+03 1.295 0.215
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21020 on 15 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7758, Adjusted R-squared: 0.716
## F-statistic: 12.98 on 4 and 15 DF, p-value: 9.189e-05
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## ALL 5 residuals are 0: no residual degrees of freedom!
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.913e+05 NaN NaN NaN
## pop.weighted.NO2 2.886e+13 NaN NaN NaN
## gdp.cap 1.277e+01 NaN NaN NaN
## pop.density 3.630e+01 NaN NaN NaN
## med.age 8.563e+02 NaN NaN NaN
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 4 and 0 DF, p-value: NA
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -72601 -8795 1065 6032 84944
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.763e+03 4.106e+04 0.238 0.8151
## pop.weighted.NO2 -6.330e+12 5.451e+12 -1.161 0.2626
## gdp.cap 3.498e+00 1.362e+00 2.568 0.0206 *
## pop.density 5.596e+01 2.514e+01 2.226 0.0407 *
## med.age -1.111e+03 1.861e+03 -0.597 0.5587
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31330 on 16 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.5856, Adjusted R-squared: 0.482
## F-statistic: 5.653 on 4 and 16 DF, p-value: 0.004944
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36674 -20261 -6478 15784 91154
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.948e+04 4.153e+04 -2.155 0.0419 *
## pop.weighted.NO2 2.303e+12 6.795e+12 0.339 0.7377
## gdp.cap 5.984e-01 9.407e-01 0.636 0.5310
## pop.density 1.212e+02 1.666e+02 0.727 0.4743
## med.age 3.421e+03 1.332e+03 2.568 0.0172 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30310 on 23 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4943, Adjusted R-squared: 0.4064
## F-statistic: 5.621 on 4 and 23 DF, p-value: 0.002629
deaths.NO2 <- function(x) {
weighted.covid.NO2.df %>%
filter(Super.region == x) %>%
ggplot(aes(x = pop.weighted.NO2, y = totdeaths.mil)) +
geom_point(color = "#333aff") +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = paste0("COVID-19 Deaths per Million People in ", (Super.region = x), " vs. Population Weighted NO2 Concentration"))
}
NO2.lm.deaths.reg <- function(x) {
weighted.covid.NO2.df %>%
filter(Super.region == x) %>%
lm(totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + med.age,.) %>%
summary.lm() #Function for linear regression statistics to be included in for loop below.
}
for (i in regions.list) {
print(deaths.NO2(i))
print(NO2.lm.deaths.reg(i))
} #Loops function for continents of interest.##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1110.2 -598.4 2.3 522.0 1135.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.560e+02 1.308e+03 0.578 0.568
## pop.weighted.NO2 1.107e+10 7.489e+10 0.148 0.884
## gdp.cap -1.077e-02 8.357e-03 -1.288 0.209
## pop.density -7.344e-02 1.079e-01 -0.681 0.502
## med.age 1.762e+01 3.231e+01 0.545 0.590
##
## Residual standard error: 730.1 on 27 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.1304, Adjusted R-squared: 0.001597
## F-statistic: 1.012 on 4 and 27 DF, p-value: 0.4185
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1363.0 -363.4 -183.6 225.8 4360.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.693e+03 2.557e+03 -1.053 0.30367
## pop.weighted.NO2 3.474e+11 3.166e+11 1.097 0.28446
## gdp.cap -4.590e-02 4.990e-02 -0.920 0.36764
## pop.density -4.620e+00 1.612e+00 -2.866 0.00897 **
## med.age 1.558e+02 9.998e+01 1.558 0.13340
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1080 on 22 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.2867, Adjusted R-squared: 0.157
## F-statistic: 2.211 on 4 and 22 DF, p-value: 0.101
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -216.20 -63.95 -15.86 42.52 349.34
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.099e+03 1.895e+02 -5.801 1.17e-06 ***
## pop.weighted.NO2 7.003e+10 2.904e+10 2.412 0.021 *
## gdp.cap -3.397e-04 5.541e-03 -0.061 0.951
## pop.density 5.389e-02 1.809e-01 0.298 0.767
## med.age 5.613e+01 9.491e+00 5.914 8.21e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 126.5 on 37 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.6178, Adjusted R-squared: 0.5765
## F-statistic: 14.95 on 4 and 37 DF, p-value: 2.328e-07
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -474.34 -169.86 -21.34 173.70 604.48
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.318e+02 4.240e+02 -1.490 0.1570
## pop.weighted.NO2 3.167e+10 2.202e+10 1.438 0.1709
## gdp.cap -8.418e-03 3.197e-03 -2.633 0.0188 *
## pop.density 2.847e-01 1.634e-01 1.742 0.1019
## med.age 4.244e+01 1.654e+01 2.566 0.0215 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 314.1 on 15 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4595, Adjusted R-squared: 0.3154
## F-statistic: 3.188 on 4 and 15 DF, p-value: 0.04403
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## ALL 5 residuals are 0: no residual degrees of freedom!
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.317e+03 NaN NaN NaN
## pop.weighted.NO2 4.078e+11 NaN NaN NaN
## gdp.cap 1.846e-01 NaN NaN NaN
## pop.density 5.531e-01 NaN NaN NaN
## med.age -3.957e+00 NaN NaN NaN
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 4 and 0 DF, p-value: NA
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -295.53 -74.64 -10.78 52.95 324.09
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.662e+01 2.099e+02 0.317 0.7565
## pop.weighted.NO2 -3.257e+10 2.673e+10 -1.219 0.2464
## gdp.cap 1.529e-02 6.695e-03 2.284 0.0414 *
## pop.density 9.516e-02 1.211e-01 0.786 0.4473
## med.age -3.381e+00 9.041e+00 -0.374 0.7149
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 149.8 on 12 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.4972, Adjusted R-squared: 0.3296
## F-statistic: 2.966 on 4 and 12 DF, p-value: 0.06438
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1125.3 -391.5 -114.2 302.0 1216.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.104e+03 8.845e+02 -3.510 0.00188 **
## pop.weighted.NO2 1.675e+10 1.447e+11 0.116 0.90887
## gdp.cap -3.280e-03 2.004e-02 -0.164 0.87139
## pop.density 7.099e+00 3.548e+00 2.001 0.05732 .
## med.age 1.052e+02 2.837e+01 3.707 0.00116 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 645.5 on 23 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6116, Adjusted R-squared: 0.5441
## F-statistic: 9.056 on 4 and 23 DF, p-value: 0.0001518
fatality.rate.NO2 <- function(x) {
weighted.covid.NO2.df %>%
filter(Super.region == x) %>%
ggplot(aes(x = pop.weighted.NO2, y = case.fatality.rate)) +
geom_point(color = "#333aff") +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = paste0("COVID-19 Deaths per Million People in ", (Super.region = x), " vs. Population Weighted NO2 Concentration"))
}
NO2.lm.fatality.reg <- function(x) {
weighted.covid.NO2.df %>%
filter(Super.region == x) %>%
lm(case.fatality.rate ~ pop.weighted.NO2 + gdp.cap + pop.density + med.age,.) %>%
summary.lm() #Function for linear regression statistics to be included in for loop below.
}
for (i in regions.list) {
print(fatality.rate.NO2(i))
print(NO2.lm.fatality.reg(i))
} #Loops function for continents of interest.##
## Call:
## lm(formula = case.fatality.rate ~ pop.weighted.NO2 + gdp.cap +
## pop.density + med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0110282 -0.0041976 0.0000133 0.0047031 0.0147586
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.920e-03 1.209e-02 -0.159 0.8750
## pop.weighted.NO2 -2.468e+04 6.923e+05 -0.036 0.9718
## gdp.cap -1.234e-07 7.726e-08 -1.597 0.1219
## pop.density -1.622e-06 9.971e-07 -1.627 0.1154
## med.age 5.934e-04 2.987e-04 1.987 0.0572 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006749 on 27 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.3271, Adjusted R-squared: 0.2274
## F-statistic: 3.281 on 4 and 27 DF, p-value: 0.02577
##
## Call:
## lm(formula = case.fatality.rate ~ pop.weighted.NO2 + gdp.cap +
## pop.density + med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.022873 -0.008812 -0.005646 0.003473 0.060924
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.035e-02 5.049e-02 0.601 0.554
## pop.weighted.NO2 4.130e+06 6.252e+06 0.661 0.516
## gdp.cap 7.909e-09 9.854e-07 0.008 0.994
## pop.density -4.125e-05 3.182e-05 -1.296 0.208
## med.age -5.371e-06 1.974e-03 -0.003 0.998
##
## Residual standard error: 0.02132 on 22 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.1354, Adjusted R-squared: -0.02178
## F-statistic: 0.8614 on 4 and 22 DF, p-value: 0.5023
##
## Call:
## lm(formula = case.fatality.rate ~ pop.weighted.NO2 + gdp.cap +
## pop.density + med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.016674 -0.007111 -0.001484 0.007006 0.022902
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.306e-02 1.541e-02 1.497 0.143
## pop.weighted.NO2 8.546e+05 2.361e+06 0.362 0.719
## gdp.cap -3.823e-07 4.505e-07 -0.849 0.402
## pop.density -1.137e-05 1.471e-05 -0.773 0.444
## med.age -1.533e-04 7.717e-04 -0.199 0.844
##
## Residual standard error: 0.01028 on 37 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.0541, Adjusted R-squared: -0.04816
## F-statistic: 0.529 on 4 and 37 DF, p-value: 0.7151
##
## Call:
## lm(formula = case.fatality.rate ~ pop.weighted.NO2 + gdp.cap +
## pop.density + med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.045443 -0.014412 -0.001374 0.008952 0.135152
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.251e-01 5.582e-02 2.241 0.0406 *
## pop.weighted.NO2 -1.800e+05 2.899e+06 -0.062 0.9513
## gdp.cap -2.374e-07 4.209e-07 -0.564 0.5810
## pop.density -1.406e-05 2.151e-05 -0.654 0.5233
## med.age -3.051e-03 2.177e-03 -1.401 0.1814
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04135 on 15 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2886, Adjusted R-squared: 0.09894
## F-statistic: 1.522 on 4 and 15 DF, p-value: 0.2461
##
## Call:
## lm(formula = case.fatality.rate ~ pop.weighted.NO2 + gdp.cap +
## pop.density + med.age, data = .)
##
## Residuals:
## ALL 5 residuals are 0: no residual degrees of freedom!
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.268e-02 NaN NaN NaN
## pop.weighted.NO2 1.367e+07 NaN NaN NaN
## gdp.cap 9.091e-06 NaN NaN NaN
## pop.density 3.624e-05 NaN NaN NaN
## med.age -5.222e-03 NaN NaN NaN
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 4 and 0 DF, p-value: NA
##
## Call:
## lm(formula = case.fatality.rate ~ pop.weighted.NO2 + gdp.cap +
## pop.density + med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.038439 -0.031820 -0.012866 0.006726 0.200574
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.375e-02 9.051e-02 0.594 0.564
## pop.weighted.NO2 -1.716e+06 1.152e+07 -0.149 0.884
## gdp.cap -1.943e-06 2.886e-06 -0.673 0.514
## pop.density -2.409e-05 5.222e-05 -0.461 0.653
## med.age 1.009e-04 3.898e-03 0.026 0.980
##
## Residual standard error: 0.06459 on 12 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.09934, Adjusted R-squared: -0.2009
## F-statistic: 0.3309 on 4 and 12 DF, p-value: 0.8519
##
## Call:
## lm(formula = case.fatality.rate ~ pop.weighted.NO2 + gdp.cap +
## pop.density + med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.015968 -0.004985 -0.002020 0.006135 0.020052
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.611e-02 1.295e-02 -2.016 0.0556 .
## pop.weighted.NO2 3.730e+05 2.119e+06 0.176 0.8618
## gdp.cap -3.885e-07 2.934e-07 -1.324 0.1985
## pop.density 5.305e-05 5.195e-05 1.021 0.3178
## med.age 1.257e-03 4.155e-04 3.027 0.0060 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.009454 on 23 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.389, Adjusted R-squared: 0.2828
## F-statistic: 3.661 on 4 and 23 DF, p-value: 0.01895
high <- data.frame("group" = rep("high", length.out = nrow(high.covid)))
high.covid <- cbind(high, high.covid)
low <- data.frame("group" = rep("low", length.out = nrow(low.covid)))
low.covid <- cbind(low, low.covid)
df.covid <- rbind(high.covid, low.covid)
#Boxplot of NO2 Concentrations by case groups.
ggplot(df.covid, aes(group,NO2_Concentration)) +
geom_boxplot() +
geom_jitter(color="red", size=0.4, alpha=0.9) +
labs(title = "NO2 Concentrations Grouped by COVID Case Levels", x = "Case Count Group", y = "NO2 Concentration (mol/m^2)") +
stat_compare_means(method = "t.test")#Boxplot of weighted NO2 Concentrations by case groups.
ggplot(df.covid, aes(group,pop.weighted.NO2)) +
geom_boxplot() +
geom_jitter(color="red", size=0.4, alpha=0.9) +
labs(title = "Weighted NO2 Concentrations Grouped by COVID Case Levels", x = "Case Count Group", y = "Weighted NO2 Concentration") +
stat_compare_means(method = "t.test")rm(df.covid)
high <- data.frame("group" = rep("high", length.out = nrow(high.NO2)))
high.NO2 <- cbind(high, high.NO2)
low <- data.frame("group" = rep("low", length.out = nrow(low.NO2)))
low.NO2<- cbind(low, low.NO2)
df.NO2 <- rbind(high.NO2, low.NO2)
#Boxplot of COVID cases by NO2 groups.
ggplot(df.NO2, aes(group,totcases.mil)) +
geom_boxplot() +
geom_jitter(color="red", size=0.4, alpha=0.9) +
labs(title = "COVID Cases per Million Grouped by NO2 Concentration Levels", x = "NO2 Concentration Group", y = "Cases per Million") +
stat_compare_means(method = "t.test")#Boxplot of COVID deaths by NO2 groups.
ggplot(df.NO2, aes(group,totdeaths.mil)) +
geom_boxplot() +
geom_jitter(color="red", size=0.4, alpha=0.9) +
labs(title = "COVID Deaths per Million Grouped by NO2 Concentration Levels", x = "NO2 Concentration Group", y = "Deaths per Million") +
stat_compare_means(method = "t.test")#Boxplot of COVID CFR by NO2 groups.
ggplot(df.NO2, aes(group,case.fatality.rate)) +
geom_boxplot() +
geom_jitter(color="red", size=0.4, alpha=0.9) +
labs(title = "COVID Case Fatality Rate Grouped by NO2 Concentration Levels", x = "NO2 Concentration Group", y = "Case Fatality Rate") +
stat_compare_means(method = "t.test")rm(df.NO2)#High NO2.
#Weighted NO2 concentration vs COVID cases.
ggplot(data = high.NO2, aes(x = pop.weighted.NO2, y = totcases.mil)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = "High NO2: World COVID-19 Cases per Million vs. Population Weighted NO2 Concentration") cor(high.NO2$totcases.mil, high.NO2$pop.weighted.NO2, use = "complete.obs") ## [1] 0.2947513
NO2.lm.cases <- lm(totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + med.age, data = high.NO2)
summary.lm(NO2.lm.cases)##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = high.NO2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -76771 -13402 -898 10680 96781
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.693e+04 1.137e+04 -4.130 7.70e-05 ***
## pop.weighted.NO2 2.568e+12 1.934e+12 1.328 0.187
## gdp.cap 7.325e-02 2.134e-01 0.343 0.732
## pop.density -2.557e+01 1.886e+01 -1.356 0.178
## med.age 2.557e+03 3.906e+02 6.548 2.79e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 29550 on 97 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.4757, Adjusted R-squared: 0.4541
## F-statistic: 22 on 4 and 97 DF, p-value: 6.055e-13
confint(NO2.lm.cases) #Summary statistics for linear fit. ## 2.5 % 97.5 %
## (Intercept) -6.948951e+04 -2.437597e+04
## pop.weighted.NO2 -1.269957e+12 6.406016e+12
## gdp.cap -3.502009e-01 4.966916e-01
## pop.density -6.300107e+01 1.186173e+01
## med.age 1.782191e+03 3.332507e+03
#Weighted NO2 concentration vs COVID deaths.
ggplot(data = high.NO2, aes(x = pop.weighted.NO2, y = totdeaths.mil)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = "High NO2: World COVID-19 Deaths per Million vs. Population Weighted NO2 Concentration") cor(high.NO2$totdeaths.mil, high.NO2$pop.weighted.NO2, use = "complete.obs") ## [1] 0.1381325
NO2.lm.deaths <- lm(totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + med.age, data = high.NO2,)
summary.lm(NO2.lm.deaths)##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = high.NO2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1527.50 -368.98 -2.76 297.70 1542.15
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.233e+03 2.458e+02 -5.016 2.38e-06 ***
## pop.weighted.NO2 1.453e+10 4.183e+10 0.347 0.7291
## gdp.cap -1.025e-02 4.615e-03 -2.220 0.0287 *
## pop.density -6.787e-01 4.079e-01 -1.664 0.0994 .
## med.age 7.137e+01 8.448e+00 8.448 2.97e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 639.1 on 97 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.4783, Adjusted R-squared: 0.4568
## F-statistic: 22.23 on 4 and 97 DF, p-value: 4.771e-13
confint(NO2.lm.deaths) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) -1.721059e+03 -7.452248e+02
## pop.weighted.NO2 -6.848890e+10 9.754728e+10
## gdp.cap -1.940599e-02 -1.087171e-03
## pop.density -1.488362e+00 1.309679e-01
## med.age 5.460066e+01 8.813500e+01
#Weighted NO2 concentration vs COVID CFR.
ggplot(data = high.NO2, aes(x = pop.weighted.NO2, y = case.fatality.rate)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Case Fatality Rate") +
labs(title = "High NO2: COVID-19 Case Fatality Rate vs. Population Weighted NO2 Concentration") cor(high.NO2$case.fatality.rate, high.NO2$pop.weighted.NO2, use = "complete.obs") ## [1] -0.03359483
NO2.lm.deaths <- lm(case.fatality.rate ~ pop.weighted.NO2 + gdp.cap + pop.density + med.age, data = high.NO2,)
summary.lm(NO2.lm.deaths)##
## Call:
## lm(formula = case.fatality.rate ~ pop.weighted.NO2 + gdp.cap +
## pop.density + med.age, data = high.NO2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.021921 -0.007799 -0.003010 0.005463 0.072667
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.559e-02 5.383e-03 2.897 0.00466 **
## pop.weighted.NO2 5.512e+05 9.159e+05 0.602 0.54866
## gdp.cap -2.038e-07 1.010e-07 -2.017 0.04651 *
## pop.density -1.236e-05 8.932e-06 -1.383 0.16971
## med.age 2.710e-04 1.850e-04 1.465 0.14612
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01399 on 97 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.05531, Adjusted R-squared: 0.01635
## F-statistic: 1.42 on 4 and 97 DF, p-value: 0.2332
confint(NO2.lm.deaths) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) 4.909463e-03 2.627647e-02
## pop.weighted.NO2 -1.266530e+06 2.369022e+06
## gdp.cap -4.043276e-07 -3.216153e-09
## pop.density -3.008592e-05 5.371159e-06
## med.age -9.611847e-05 6.381541e-04
#Function for multivariate analysis by Super.region.
cases.NO2 <- function(x) {
high.NO2 %>%
filter(Super.region == x) %>%
ggplot(aes(x = pop.weighted.NO2, y = totcases.mil)) +
geom_point(color = "#333aff") +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = paste0("COVID-19 Cases per Million People in ", (Super.region = x), " vs. Population Weighted NO2 Concentration"))
}
NO2.lm.cases.reg <- function(x) {
high.NO2 %>%
filter(Super.region == x) %>%
lm(totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + med.age,.) %>%
summary.lm()
} #Function for linear regression statistics to be included in for loop below.
for (i in regions.list) {
print(cases.NO2(i))
print(NO2.lm.cases.reg(i))
} #Loops function for continents of interest. ##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -65602 -18514 6059 25655 47547
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.654e+05 7.329e+04 2.257 0.0367 *
## pop.weighted.NO2 1.919e+12 6.745e+12 0.285 0.7793
## gdp.cap -4.519e-01 6.746e-01 -0.670 0.5115
## pop.density -2.695e+00 6.568e+01 -0.041 0.9677
## med.age -2.148e+03 1.820e+03 -1.180 0.2534
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 35880 on 18 degrees of freedom
## Multiple R-squared: 0.1035, Adjusted R-squared: -0.09568
## F-statistic: 0.5197 on 4 and 18 DF, p-value: 0.7224
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## 1 2 3 4 5 6 7
## 8777 -1333 -2963 3990 -41812 22578 10764
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.756e+04 1.701e+05 -0.515 0.658
## pop.weighted.NO2 3.978e+13 5.529e+13 0.719 0.547
## gdp.cap 2.374e-01 3.175e+00 0.075 0.947
## pop.density -2.936e+02 1.937e+02 -1.516 0.269
## med.age 3.341e+03 5.419e+03 0.617 0.600
##
## Residual standard error: 35200 on 2 degrees of freedom
## Multiple R-squared: 0.6837, Adjusted R-squared: 0.05123
## F-statistic: 1.081 on 4 and 2 DF, p-value: 0.5325
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5341.0 -1442.3 -16.3 1741.1 4790.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.872e+04 7.407e+03 -3.878 0.00133 **
## pop.weighted.NO2 1.436e+12 1.349e+12 1.064 0.30311
## gdp.cap 1.088e+00 3.367e-01 3.232 0.00521 **
## pop.density -5.255e+00 6.990e+00 -0.752 0.46307
## med.age 1.455e+03 4.584e+02 3.173 0.00590 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3055 on 16 degrees of freedom
## Multiple R-squared: 0.9084, Adjusted R-squared: 0.8855
## F-statistic: 39.68 on 4 and 16 DF, p-value: 4.086e-08
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25681.6 -14474.6 134.7 14315.8 19752.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.628e+03 4.620e+04 -0.122 0.906
## pop.weighted.NO2 -5.335e+12 6.591e+12 -0.809 0.442
## gdp.cap 6.226e-01 4.211e-01 1.478 0.178
## pop.density 2.113e+02 1.430e+02 1.478 0.178
## med.age 7.712e+02 1.601e+03 0.482 0.643
##
## Residual standard error: 18510 on 8 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6966, Adjusted R-squared: 0.545
## F-statistic: 4.593 on 4 and 8 DF, p-value: 0.03207
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## ALL 5 residuals are 0: no residual degrees of freedom!
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.913e+05 NaN NaN NaN
## pop.weighted.NO2 2.886e+13 NaN NaN NaN
## gdp.cap 1.277e+01 NaN NaN NaN
## pop.density 3.630e+01 NaN NaN NaN
## med.age 8.563e+02 NaN NaN NaN
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 4 and 0 DF, p-value: NA
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## 1 3 4 5 6 7 8
## -208.35 2990.80 -2634.82 -50.89 919.46 969.02 -1985.22
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.754e+04 7.185e+03 2.442 0.1347
## pop.weighted.NO2 -7.075e+11 1.079e+12 -0.656 0.5793
## gdp.cap 1.171e+00 1.936e-01 6.046 0.0263 *
## pop.density 2.474e+01 1.226e+01 2.018 0.1811
## med.age -8.820e+02 3.031e+02 -2.910 0.1006
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3291 on 2 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.9516, Adjusted R-squared: 0.8547
## F-statistic: 9.825 on 4 and 2 DF, p-value: 0.09451
##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36849 -21174 -4421 16219 92706
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.879e+04 4.657e+04 -2.121 0.0460 *
## pop.weighted.NO2 1.712e+12 7.678e+12 0.223 0.8257
## gdp.cap 5.295e-01 9.843e-01 0.538 0.5963
## pop.density 1.702e+02 1.817e+02 0.936 0.3597
## med.age 3.647e+03 1.409e+03 2.588 0.0172 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31230 on 21 degrees of freedom
## Multiple R-squared: 0.4877, Adjusted R-squared: 0.3902
## F-statistic: 4.999 on 4 and 21 DF, p-value: 0.005452
deaths.NO2 <- function(x) {
high.NO2 %>%
filter(Super.region == x) %>%
ggplot(aes(x = pop.weighted.NO2, y = totdeaths.mil)) +
geom_point(color = "#333aff") +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = paste0("COVID-19 Deaths per Million People in ", (Super.region = x), " vs. Population Weighted NO2 Concentration"))
}
NO2.lm.deaths.reg <- function(x) {
high.NO2 %>%
filter(Super.region == x) %>%
lm(totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + med.age,.) %>%
summary.lm() #Function for linear regression statistics to be included in for loop below.
}
for (i in regions.list) {
print(deaths.NO2(i))
print(NO2.lm.deaths.reg(i))
} #Loops function for continents of interest.##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1147.6 -548.2 172.5 397.8 1104.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.953e+03 1.528e+03 1.278 0.218
## pop.weighted.NO2 -1.882e+10 1.407e+11 -0.134 0.895
## gdp.cap -2.058e-02 1.407e-02 -1.463 0.161
## pop.density -4.121e-01 1.370e+00 -0.301 0.767
## med.age 4.738e+00 3.796e+01 0.125 0.902
##
## Residual standard error: 748.2 on 18 degrees of freedom
## Multiple R-squared: 0.1215, Adjusted R-squared: -0.07369
## F-statistic: 0.6225 on 4 and 18 DF, p-value: 0.6523
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## 1 2 3 4 5 6 7
## -151.18 26.53 45.48 27.39 191.80 -85.56 -54.45
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.247e+03 9.264e+02 -2.426 0.1361
## pop.weighted.NO2 6.239e+11 3.011e+11 2.072 0.1740
## gdp.cap -4.191e-02 1.729e-02 -2.424 0.1362
## pop.density -7.541e+00 1.055e+00 -7.149 0.0190 *
## med.age 1.381e+02 2.950e+01 4.681 0.0427 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 191.6 on 2 degrees of freedom
## Multiple R-squared: 0.9843, Adjusted R-squared: 0.9529
## F-statistic: 31.35 on 4 and 2 DF, p-value: 0.03115
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -153.54 -67.88 -45.37 84.55 229.88
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.691e+02 2.614e+02 -3.707 0.00192 **
## pop.weighted.NO2 1.182e+11 4.763e+10 2.483 0.02451 *
## gdp.cap 1.448e-02 1.188e-02 1.219 0.24056
## pop.density -7.198e-02 2.467e-01 -0.292 0.77423
## med.age 4.214e+01 1.618e+01 2.605 0.01916 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 107.8 on 16 degrees of freedom
## Multiple R-squared: 0.8294, Adjusted R-squared: 0.7868
## F-statistic: 19.45 on 4 and 16 DF, p-value: 5.47e-06
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -483.37 -148.69 2.96 168.01 431.23
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.381e+03 8.029e+02 -1.720 0.1238
## pop.weighted.NO2 9.745e+10 1.145e+11 0.851 0.4196
## gdp.cap -1.179e-02 7.317e-03 -1.612 0.1457
## pop.density -1.071e+00 2.484e+00 -0.431 0.6777
## med.age 6.608e+01 2.782e+01 2.375 0.0449 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 321.7 on 8 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5773, Adjusted R-squared: 0.366
## F-statistic: 2.732 on 4 and 8 DF, p-value: 0.1056
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## ALL 5 residuals are 0: no residual degrees of freedom!
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.317e+03 NaN NaN NaN
## pop.weighted.NO2 4.078e+11 NaN NaN NaN
## gdp.cap 1.846e-01 NaN NaN NaN
## pop.density 5.531e-01 NaN NaN NaN
## med.age -3.957e+00 NaN NaN NaN
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 4 and 0 DF, p-value: NA
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## 1 3 4 5 6 7 8
## 2.972 21.518 -18.880 -18.419 44.625 34.423 -66.239
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.044e+02 1.443e+02 1.416 0.292
## pop.weighted.NO2 -1.320e+09 2.166e+10 -0.061 0.957
## gdp.cap 8.808e-03 3.887e-03 2.266 0.152
## pop.density 4.856e-01 2.462e-01 1.973 0.187
## med.age -1.038e+01 6.085e+00 -1.707 0.230
##
## Residual standard error: 66.07 on 2 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.8163, Adjusted R-squared: 0.4488
## F-statistic: 2.221 on 4 and 2 DF, p-value: 0.3337
##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1130.4 -334.2 -105.7 371.4 1277.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.218e+03 9.651e+02 -3.335 0.00314 **
## pop.weighted.NO2 -3.946e+10 1.591e+11 -0.248 0.80653
## gdp.cap -3.597e-03 2.040e-02 -0.176 0.86170
## pop.density 8.621e+00 3.766e+00 2.289 0.03253 *
## med.age 1.103e+02 2.920e+01 3.776 0.00111 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 647.1 on 21 degrees of freedom
## Multiple R-squared: 0.6097, Adjusted R-squared: 0.5353
## F-statistic: 8.201 on 4 and 21 DF, p-value: 0.0003795
#Low NO2.
#Weighted NO2 concentration vs COVID cases.
ggplot(data = low.NO2, aes(x = pop.weighted.NO2, y = totcases.mil)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = "Low NO2: World COVID-19 Cases per Million vs. Population Weighted NO2 Concentration") cor(low.NO2$totcases.mil, low.NO2$pop.weighted.NO2, use = "complete.obs") ## [1] -0.2395034
NO2.lm.cases <- lm(totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + med.age, data = low.NO2)
summary.lm(NO2.lm.cases)##
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = low.NO2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -60468 -20076 -4999 9283 112386
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.314e+04 2.048e+04 -0.641 0.5234
## pop.weighted.NO2 -1.057e+13 8.047e+12 -1.313 0.1934
## gdp.cap 5.251e-01 2.860e-01 1.836 0.0706 .
## pop.density -8.421e+00 3.803e+00 -2.214 0.0301 *
## med.age 1.554e+03 7.553e+02 2.057 0.0435 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33570 on 69 degrees of freedom
## (30 observations deleted due to missingness)
## Multiple R-squared: 0.2558, Adjusted R-squared: 0.2126
## F-statistic: 5.928 on 4 and 69 DF, p-value: 0.0003683
confint(NO2.lm.cases) #Summary statistics for linear fit. ## 2.5 % 97.5 %
## (Intercept) -5.399559e+04 2.772475e+04
## pop.weighted.NO2 -2.662089e+13 5.484073e+12
## gdp.cap -4.540096e-02 1.095648e+00
## pop.density -1.600856e+01 -8.339348e-01
## med.age 4.683803e+01 3.060399e+03
#Weighted NO2 concentration vs COVID deaths.
ggplot(data = low.NO2, aes(x = pop.weighted.NO2, y = totdeaths.mil)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = "Low NO2: World COVID-19 Deaths per Million vs. Population Weighted NO2 Concentration") cor(low.NO2$totdeaths.mil, low.NO2$pop.weighted.NO2, use = "complete.obs") ## [1] -0.04312052
NO2.lm.deaths <- lm(totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + med.age, data = low.NO2,)
summary.lm(NO2.lm.deaths)##
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density +
## med.age, data = low.NO2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -890.4 -294.8 -191.3 148.2 5224.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.747e+02 5.134e+02 -1.314 0.1935
## pop.weighted.NO2 1.527e+11 2.046e+11 0.746 0.4582
## gdp.cap -4.328e-03 6.753e-03 -0.641 0.5238
## pop.density -1.031e-01 8.997e-02 -1.146 0.2560
## med.age 4.109e+01 1.817e+01 2.262 0.0271 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 792 on 65 degrees of freedom
## (34 observations deleted due to missingness)
## Multiple R-squared: 0.09308, Adjusted R-squared: 0.03726
## F-statistic: 1.668 on 4 and 65 DF, p-value: 0.1682
confint(NO2.lm.deaths) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) -1.700069e+03 3.507511e+02
## pop.weighted.NO2 -2.559880e+11 5.614107e+11
## gdp.cap -1.781470e-02 9.158708e-03
## pop.density -2.827869e-01 7.657921e-02
## med.age 4.807252e+00 7.736744e+01
#Getting satellite data specifically for this time frame.
July_Oct2018.sat.r <- raster('US_ground_NO2/US_NO2_2018-07-01_start.tif') %>%
flip(direction='y')
July_Oct2018.sat.r <- crop(July_Oct2018.sat.r, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
July_Oct2018.sat.r <- mask(July_Oct2018.sat.r, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
#Direct conversion from NO2 raster to data frame while filtering by shapefile.
continental.US.fxn <- function(x) {
r1 <- raster(x) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
r1 <- mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r1 <- rasterToPoints(r1, spatial = TRUE)
July_Oct2018.sat <- r1 %>%
data.frame() %>%
select(Latitude = 3, Longitude = 2, NO2_Concentration = 1)
assign("July_Oct2018.sat", July_Oct2018.sat, .GlobalEnv)
} #Final satellite data to use for comparison to EPA data.
continental.US.fxn("US_ground_NO2/US_NO2_2018-07-01_start.tif")
#Selecting just latitude and longitude from data frame just made to use with Pargasite retrieval.
US.lat.lon <- July_Oct2018.sat %>% select(c(Latitude, Longitude))
# write.csv(US.lat.lon, file.path("US_ground_NO2", "US_lat_lon.csv"), row.names = FALSE) #Used to gather EPA data from Pargasite.
#Reading in EPA data
#Pargasite: only overlapping time frame is July 2018 - October 2018 (Pargasite stops working in November 2018)
July_Oct2018.ground <- read.csv(file = './US_ground_NO2/July_Oct2018NO2.csv') %>% select(c("Latitude", "Longitude", "no2_estimate")) %>% rename(NO2_Concentration = no2_estimate)
July_Oct2018.ground <- data.frame(July_Oct2018.ground) #Final EPA data to use in comparison to satellite data.
#EPA data frame changed to raster format.
July_Oct2018.ground.r <- July_Oct2018.ground %>% select(c("Longitude", "Latitude", "NO2_Concentration")) %>% rasterFromXYZ() #Resampling raster data to be same size for comparisons.
July_Oct2018.sat.r.re <- July_Oct2018.sat.r %>% resample(July_Oct2018.ground.r)
#Perform correlation test between satellite and ground rasters.
cor(values(July_Oct2018.sat.r.re),
values(July_Oct2018.ground.r),
use = "na.or.complete")## [1] 0.03616317
#Linear model estimation.
lm1 <- lm(values(July_Oct2018.sat.r.re) ~ values(July_Oct2018.ground.r))
summary(lm1)##
## Call:
## lm(formula = values(July_Oct2018.sat.r.re) ~ values(July_Oct2018.ground.r))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.184e-05 -5.636e-06 -7.730e-07 3.633e-06 4.478e-05
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.152e-05 8.089e-07 26.603 <2e-16 ***
## values(July_Oct2018.ground.r) 1.211e-07 1.173e-07 1.032 0.302
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.48e-06 on 813 degrees of freedom
## (417 observations deleted due to missingness)
## Multiple R-squared: 0.001308, Adjusted R-squared: 7.937e-05
## F-statistic: 1.065 on 1 and 813 DF, p-value: 0.3025
#Local correlation estimates--shows where locally on map specific regions are positively or negatively correlated. (Getting an error: try to replicate code from this website https://statnmap.com/2018-01-27-spatial-correlation-between-rasters/)ggplot() +
geom_raster(data = July_Oct2018.sat, aes(x = Longitude, y = Latitude, fill = NO2_Concentration)) +
geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="United States", ], aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
my_theme() +
ggtitle("NO2 US Satellite Data, July-Oct 2018") +
#scale_fill_manual(breaks = c(0, 0.0000025, 0.000005, 0.0000075, 0.00001, 0.00002)) +
scale_fill_gradientn(name = "NO2 Concentration \n(mol/m^2)", colours = myPalette(100)) +
north(x.min = -130, y.min = 23, x.max = -65, y.max = 49, anchor = c(x=-119, y=32), symbol=12) +
scalebar(x.min = -130, y.min = 23, x.max = -65, y.max = 49, dist = 200, dist_unit = 'km', transform = TRUE, anchor = c(x=-120, y=27.5), st.size = 1.5, model = 'WGS84') +
xlim(-130,-65) +
ylim(23,49) +
coord_quickmap()ggplot() +
geom_raster(data = July_Oct2018.ground, aes(x = Longitude, y = Latitude, fill = NO2_Concentration)) +
geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="United States", ], aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
my_theme() +
ggtitle("NO2 US EPA Ground Data, July-Oct 2018") +
#scale_fill_manual(breaks = c(0, 0.0000025, 0.000005, 0.0000075, 0.00001, 0.00002)) +
scale_fill_gradientn(name = "NO2 Concentration \n(ppb)", colours = myPalette(100)) +
north(x.min = -130, y.min = 23, x.max = -65, y.max = 49, anchor = c(x=-119, y=32), symbol=12) +
scalebar(x.min = -130, y.min = 23, x.max = -65, y.max = 49, dist = 200, dist_unit = 'km', transform = TRUE, anchor = c(x=-120, y=27.5), st.size = 1.5, model = 'WGS84') +
xlim(-130,-65) +
ylim(23,49) +
coord_quickmap()#No success in retrieving data so far due to lack of shapefiles. #Unsuccessful without ArcGIS access.
london.shp <- st_read("six_cities_shapefiles/london/ESRI/London_Borough_Excluding_MHW.shp") # Read shapefile as an sf object## Reading layer `London_Borough_Excluding_MHW' from data source `/Users/alanaschreibman/COVID-19_NO2/six_cities_shapefiles/london/ESRI/London_Borough_Excluding_MHW.shp' using driver `ESRI Shapefile'
## Simple feature collection with 33 features and 7 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## Projected CRS: OSGB 1936 / British National Grid
class(london.shp)## [1] "sf" "data.frame"
london.shp <- st_geometry(london.shp)
plot(london.shp)seattle.shp <- st_read("six_cities_shapefiles/seattle/WA.shp") # Read shapefile as an sf object## Reading layer `WA' from data source `/Users/alanaschreibman/COVID-19_NO2/six_cities_shapefiles/seattle/WA.shp' using driver `ESRI Shapefile'
## Simple feature collection with 39 features and 48 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -124.7314 ymin: 45.54325 xmax: -116.9182 ymax: 49
## CRS: NA
class(seattle.shp)## [1] "sf" "data.frame"
seattle.shp <- st_geometry(seattle.shp)
plot(seattle.shp)#Unsuccessful: cannot find shapefiles for the selected cities without ArcGIS access.
#Read in world AQI data for 6 cities. Dates range 12/30/19 to 4/5/20.
waqi.main <- read.csv(file = 'waqi_NO2.csv', header = TRUE)
seattle.df <- filter(waqi.main, City == "Seattle")
seattle.df$Date <- as.Date(seattle.df$Date, format = "%m/%d/%y")
seattle.df <- arrange(seattle.df, Date)
wuhan.df <- filter(waqi.main, City == "Wuhan")
wuhan.df$Date <- as.Date(wuhan.df$Date, format = "%m/%d/%y")
wuhan.df <- arrange(wuhan.df, Date)
milan.df <- filter(waqi.main, City == "Milan")
milan.df$Date <- as.Date(milan.df$Date, format = "%m/%d/%y")
milan.df <- arrange(milan.df, Date)
london.df <- filter(waqi.main, City == "London")
london.df$Date <- as.Date(london.df$Date, format = "%m/%d/%y")
london.df <- arrange(london.df, Date)
saopaulo.df <- filter(waqi.main, City == "Sao Paulo")
saopaulo.df$Date <- as.Date(saopaulo.df$Date, format = "%m/%d/%y")
saopaulo.df <- arrange(saopaulo.df, Date)
johannesburg.df <- filter(waqi.main, City == "Johannesburg")
johannesburg.df$Date <- as.Date(johannesburg.df$Date, format = "%m/%d/%y")
johannesburg.df <- arrange(johannesburg.df, Date)
dates.2020.Q1 <- seq(as.Date("2019-12-30"), as.Date("2020-04-05"), by=7) #List of dates starting Dec 30th 2019.
#Averaging NO2 data by the week.
#Seattle
seattle.ground.fxn <- function(x) {
date.filter <- subset(seattle.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
mean <- mean(date.filter$median)
seattle.avg.df <- data.frame(mean)
names(seattle.avg.df)[1] <- "Mean_NO2_Concentration"
Week_start_date <- as.Date(dates.2020.Q1[[x]])
Type <- rep("satellite", length.out = nrow(seattle.avg.df))
seattle.ground.weekly <- cbind(Week_start_date, Type, seattle.avg.df)
seattle.ground.weekly <- seattle.ground.weekly %>% dplyr::select(Type, Week_start_date, Mean_NO2_Concentration)
}
x <- seq_along(dates.2020.Q1)
seattle.ground.weekly.df <- purrr::map_dfr(x, seattle.ground.fxn)
#Wuhan
wuhan.ground.NO2.fxn <- function(x) {
date.filter <- subset(wuhan.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
mean <- mean(date.filter$median)
date.start <- as.Date(dates.2020.Q1[[x]])
wuhan.avg.df <- data.frame(date.start, wuhan.df[1,3], mean)
names(wuhan.avg.df)[1] <- "Week_start_date"
names(wuhan.avg.df)[2] <- "City"
names(wuhan.avg.df)[3] <- "Mean_of_Median_NO2"
output <- wuhan.avg.df
return(output)
}
wuhan.ground.df <- purrr::map_dfr(x, wuhan.ground.NO2.fxn)
#Milan
milan.ground.NO2.fxn <- function(x) {
date.filter <- subset(milan.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
mean <- mean(date.filter$median)
date.start <- as.Date(dates.2020.Q1[[x]])
milan.avg.df <- data.frame(date.start, milan.df[1,3], mean)
names(milan.avg.df)[1] <- "Week_start_date"
names(milan.avg.df)[2] <- "City"
names(milan.avg.df)[3] <- "Mean_of_Median_NO2"
output <- milan.avg.df
return(output)
}
milan.ground.df <- purrr::map_dfr(x, milan.ground.NO2.fxn)
#London
london.ground.fxn <- function(x) {
date.filter <- subset(london.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
mean <- mean(date.filter$median)
london.avg.df <- data.frame(mean)
names(london.avg.df)[1] <- "Mean_NO2_Concentration"
Week_start_date <- as.Date(dates.2020.Q1[[x]])
Type <- rep("satellite", length.out = nrow(london.avg.df))
london.ground.weekly <- cbind(Week_start_date, Type, london.avg.df)
london.ground.weekly <- london.ground.weekly %>% dplyr::select(Type, Week_start_date, Mean_NO2_Concentration)
}
x <- seq_along(dates.2020.Q1)
london.ground.weekly <- purrr::map_dfr(x, london.ground.fxn)
#Sao Paulo
saopaulo.ground.NO2.fxn <- function(x) {
date.filter <- subset(saopaulo.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
mean <- mean(date.filter$median)
date.start <- as.Date(dates.2020.Q1[[x]])
saopaulo.avg.df <- data.frame(date.start, saopaulo.df[1,3], mean)
names(saopaulo.avg.df)[1] <- "Week_start_date"
names(saopaulo.avg.df)[2] <- "City"
names(saopaulo.avg.df)[3] <- "Mean_of_Median_NO2"
output <- saopaulo.avg.df
return(output)
}
saopaulo.ground.df <- purrr::map_dfr(x, saopaulo.ground.NO2.fxn)
#Jonannesburg
jonannesburg.ground.NO2.fxn <- function(x) {
date.filter <- subset(johannesburg.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
mean <- mean(date.filter$median)
date.start <- as.Date(dates.2020.Q1[[x]])
johannesburg.avg.df <- data.frame(date.start, johannesburg.df[1,3], mean)
names(johannesburg.avg.df)[1] <- "Week_start_date"
names(johannesburg.avg.df)[2] <- "City"
names(johannesburg.avg.df)[3] <- "Mean_of_Median_NO2"
output <- johannesburg.avg.df
return(output)
}
jonannesburg.ground.df <- purrr::map_dfr(x, jonannesburg.ground.NO2.fxn)
#Retrieving satellite data:
#Used code:
#gsutil -m cp \
# > "file names" \
# > COVID-19_NO2/six_cities_ground_NO2
# in terminal to copy all files from google cloud storage into folders in working directory.
#Plan:
#get shapefiles for each city
#use shapefiles to filter satellite data for each city and make data frame equivalent to ground data per week
#make graphs for each comparing the two data sets
#Creates list of raster files from folder to call in function.
seattle.rastlist <- list.files(path = "six_cities_ground_NO2/seattle", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
#Function to make data frame for weekly average NO2 concentration.
seattle.sat.fxn <- function(x, y) {
r <- raster(paste0("six_cities_ground_NO2/seattle/", x)) %>% flip(direction='y')
r.vals <- raster::extract(r, `seattle.shp`)
r.mean <- lapply(r.vals, FUN=mean, na.rm = TRUE)
final.df <- data.frame(`seattle.shp`@data, r.mean)
Week_start_date <- as.Date(dates.2020.Q1[[y]])
Type <- rep(satellite, length.out = nrow(final.df))
seattle.sat.weekly <- cbind(final.df, Date, Week, Type)
seattle.sat.weekly <- seattle.sat.weekly %>% dplyr::mutate(Date = as.factor(strftime(as.Date(Date, format="%Y-%m-%d"), format="%m-%d")))
names(seattle.sat.weekly)[11] <- "Mean_NO2_Concentration"
seattle.sat.weekly <- seattle.sat.weekly %>% select(Type, Date, Mean_NO2_Concentration)
}
x <- seattle.rastlist
y <- 1:length(dates.2020.Q1)
seattle.sat.weekly.df <- purrr::map2_dfr(x, y, seattle.sat.fxn)
seattle.weekly.df <- rbind(seattle.sat.weekly.df, seattle.ground.weekly.df)
london.rastlist <- list.files(path = "six_cities_ground_NO2/london", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
london.sat.fxn <- function(x, y) {
r <- raster(paste0("six_cities_ground_NO2/london/", x)) %>% flip(direction='y')
r.vals <- raster::extract(r, `london.shp`)
r.mean <- lapply(r.vals, FUN=mean, na.rm = TRUE)
final.df <- data.frame(r.mean)
Week_start_date <- as.Date(dates.2020.Q1[[y]])
Type <- rep("satellite", length.out = nrow(final.df))
london.sat.weekly <- cbind(final.df, Week_start_date, Type)
names(london.sat.weekly)[1] <- "Mean_NO2_Concentration"
london.sat.weekly <- london.sat.weekly %>% dplyr::select(Type, Week_start_date, Mean_NO2_Concentration)
}
x <- london.rastlist
y <- 1:length(dates.2020.Q1)
london.sat.weekly <- purrr::map2_dfr(x, y, london.sat.fxn)
london.weekly.df <- rbind(london.sat.weekly,london.ground.weekly.df) time.series.theme <- function() {
theme(legend.key.size = unit(1, 'cm'),
plot.title = element_text(size=22),
legend.position="right",
legend.title = element_text(size=14),
legend.text = element_text(size=10),
axis.title=element_text(size = 10),
axis.text.y=element_text(size=10),
axis.text.x=element_text(size=10,angle=90, hjust=1))
}
#Cases
ggplot(mean.newcases.df, aes(x = Date, y = Mean_Weekly_Cases)) +
geom_line(size = 0.75) +
theme_bw() +
time.series.theme() +
ggtitle("World Weekly Averaged COVID Cases") +
ylab("Number of Cases") +
scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2020-01-20",format="%Y-%m-%d"),as.Date("2021-07-04",format="%Y-%m-%d"))) +
ylim(0, 840000) +
#Pre-lockdown
geom_rect(data=mean.newcases.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-01-23",format="%Y-%m-%d"),ymin=8.3E5,ymax=8.4E5),fill = "#c0c0c0",color = NA, alpha=0.02) +
geom_text(data=mean.newcases.df,aes(x=as.Date("2020-01-08"),y=8.35E5),label="Pre-Lockdown",color = "black", size = 3) +
geom_vline(data=mean.newcases.df,xintercept = as.Date("2020-01-23",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Lockdown 1
geom_rect(data=mean.newcases.df,aes(xmin=as.Date("2020-01-23",format="%Y-%m-%d"),xmax=as.Date("2020-07-01",format="%Y-%m-%d"),ymin=8.3E5,ymax=8.4E5),fill = "#fa9796",color = NA,alpha=0.02) +
geom_text(data=mean.newcases.df,aes(x=as.Date("2020-04-01"),y=8.35E5),label="Lockdown 1", color = "black", size = 3) +
geom_vline(data=mean.newcases.df,xintercept=as.Date("2020-07-01",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Re-opening Green PHASE
geom_rect(data=mean.newcases.df,aes(xmin=as.Date("2020-07-01",format="%Y-%m-%d"),xmax=as.Date("2021-07-04",format="%Y-%m-%d"),ymin=8.3E5,ymax=8.4E5),fill = "#b5dea5",color = NA,alpha=0.02) +
geom_text(data=mean.newcases.df,aes(x=as.Date("2020-09-15"),y=8.35E5),label="Re-Opening", color = "black", size = 3) +
scale_alpha_manual(values = c(0.4, 1.1)) +
scale_color_manual(values = c("gray", "blue", "black")) +
ggsave("covid_cases_timeseries.pdf")#Deaths
ggplot(mean.newdeaths.df , aes(x = Date, y = Mean_Weekly_Deaths)) +
geom_line(size = 0.75) +
theme_bw() +
time.series.theme() +
ggtitle("World Weekly Averaged COVID Deaths") +
ylab("Number of Deaths") +
scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2020-01-20",format="%Y-%m-%d"),as.Date("2021-07-04",format="%Y-%m-%d"))) +
ylim(0, 1.5E4) +
#Pre-lockdown
geom_rect(data=mean.newdeaths.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-01-23",format="%Y-%m-%d"),ymin=1.45E4,ymax=1.5E4),fill = "#c0c0c0",color = NA, alpha=0.02) +
geom_text(data=mean.newdeaths.df,aes(x=as.Date("2020-01-08"),y=1.475E4),label="Pre-Lockdown",color = "black", size = 3) +
geom_vline(data=mean.newdeaths.df,xintercept = as.Date("2020-01-23",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Lockdown 1
geom_rect(data=mean.newdeaths.df,aes(xmin=as.Date("2020-01-23",format="%Y-%m-%d"),xmax=as.Date("2020-07-01",format="%Y-%m-%d"),ymin=1.45E4,ymax=1.5E4),fill = "#fa9796",color = NA,alpha=0.02) +
geom_text(data=mean.newdeaths.df,aes(x=as.Date("2020-04-01"),y=1.475E4),label="Lockdown 1", color = "black", size = 3) +
geom_vline(data=mean.newdeaths.df,xintercept=as.Date("2020-07-01",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Re-opening Green PHASE
geom_rect(data=mean.newdeaths.df,aes(xmin=as.Date("2020-07-01",format="%Y-%m-%d"),xmax=as.Date("2021-07-04",format="%Y-%m-%d"),ymin=1.45E4,ymax=1.5E4),fill = "#b5dea5",color = NA,alpha=0.02) +
geom_text(data=mean.newdeaths.df,aes(x=as.Date("2020-09-15"),y=1.475E4),label="Re-Opening", color = "black", size = 3) +
scale_alpha_manual(values = c(0.4, 1.1)) +
scale_color_manual(values = c("gray", "blue", "black")) +
ggsave("covid_deaths_timeseries.pdf")#Used code:
#gsutil -m cp \
# > "file names" \
# > COVID-19_NO2/time_analysis_2020
# in terminal to copy all files from google cloud storage into folders in working directory.
#2019 data.
#Creates list of raster files from folder to call in function.
rastlist.2019.left <- list.files(path = "time_analysis_2019_left", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
rastlist.2019.right <- list.files(path = "time_analysis_2019_right", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
#Creates list of dates to call in function.
dates.2019 <- seq(as.Date("2018-12-31"), as.Date("2019-12-29"), by=7) #List of dates starting Dec 31 2018.
#Function to make data frame for weekly average NO2 concentration.
raster.list.fxn <- function(x1, x2) {
left <- raster(paste0("time_analysis_2019_left/", x1)) %>%
flip(direction='y')
right <- raster(paste0("time_analysis_2019_right/", x2)) %>%
flip(direction='y')
x <- list(left, right)
names(x)[1:2] <- c('x', 'y')
x$fun <- mean
x$na.rm <- TRUE
world.NO2.r <- do.call(mosaic, x) #Combines left and right halves of world raster.
world.NO2.r
}
x1 <- as.list(rastlist.2019.left)
x2 <- as.list(rastlist.2019.right)
urban.2019.weekly.list <- purrr::map2(x1, x2, raster.list.fxn)
raster.extract.fxn <- function(r, y) {
r.vals <- exact_extract(r, world.sf, weights = 'area', 'mean')
r.mean <- mean(r.vals, na.rm = TRUE)
final.df <- data.frame("NO2_Concentration" = r.mean)
Date <- as.factor(as.Date(dates.2019[[y]] %m+% years(1))) #Adds 1 year to date to match x axis of 2020 dates.
Week <- paste0(as.character(as.Date(dates.2019[[y]])), " to ", as.character(as.Date(dates.2019[[y]]) + 6))
Period <- rep(2019, length.out = nrow(final.df))
urban.2019.weekly <- cbind(final.df, Period, Date, Week)
urban.2019.weekly <- urban.2019.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
r <- urban.2019.weekly.list
y <- as.list(1:length(dates.2019))
urban.2019.weekly.df <- purrr::map2_dfr(r, y, raster.extract.fxn)
#2020 data.
#Creates list of raster files from folder to call in function.
rastlist.2020.left <- list.files(path = "time_analysis_2020_left", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
rastlist.2020.right <- list.files(path = "time_analysis_2020_right", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
#Creates list of dates to call in function.
dates.2020 <- seq(as.Date("2019-12-30"), as.Date("2021-01-03"), by=7) #List of dates starting Dec 30th 2019.
#Function to make data frame for weekly average NO2 concentration.
raster.list.fxn <- function(x1, x2) {
left <- raster(paste0("time_analysis_2020_left/", x1)) %>%
flip(direction='y')
right <- raster(paste0("time_analysis_2020_right/", x2)) %>%
flip(direction='y')
x <- list(left, right)
names(x)[1:2] <- c('x', 'y')
x$fun <- mean
x$na.rm <- TRUE
world.NO2.r <- do.call(mosaic, x) #Combines left and right halves of world raster.
world.NO2.r
}
x1 <- as.list(rastlist.2020.left)
x2 <- as.list(rastlist.2020.right)
urban.2020.weekly.list <- purrr::map2(x1, x2, raster.list.fxn)
raster.extract.fxn <- function(r, y) {
r.vals <- exact_extract(r, world.sf, weights = 'area', 'mean')
r.mean <- mean(r.vals, na.rm = TRUE)
final.df <- data.frame("NO2_Concentration" = r.mean)
Date <- as.factor(as.Date(dates.2020[[y]]))
Week <- paste0(as.character(as.Date(dates.2020[[y]])), " to ", as.character(as.Date(dates.2020[[y]]) + 6))
Period <- rep(2020, length.out = nrow(final.df))
urban.2020.weekly <- cbind(final.df, Period, Date, Week)
urban.2020.weekly <- urban.2020.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
r <- urban.2020.weekly.list
y <- as.list(1:length(dates.2020))
urban.2020.weekly.df <- purrr::map2_dfr(r, y, raster.extract.fxn)
urban.weekly.df <- rbind(urban.2019.weekly.df, urban.2020.weekly.df)
urban.weekly.df$Date <- as.Date(urban.weekly.df$Date)
urban.weekly.df$Period <- as.factor(urban.weekly.df$Period)
write.csv(urban.weekly.df, file = 'time_series_csv/urban.weekly.NO2.csv')
rm(urban.weekly.df)urban.weekly.df <- read.csv(file = 'time_series_csv/urban.weekly.NO2.csv')
urban.weekly.df$Date <- as.Date(urban.weekly.df$Date)
urban.weekly.df$Period <- as.factor(urban.weekly.df$Period)
urban.plot <- ggplot(urban.weekly.df, aes(x = Date, y = NO2_Concentration, group = Period, color = Period)) +
geom_line(size = 0.75) +
theme_bw() +
time.series.theme() +
labs(fill = "Year of Measured Pollution") +
ggtitle("Urban Areas NO2 in 2019 vs 2020") +
ylab("NO2 Concentration \n(mol/m^2)") +
scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2019-12-25",format="%Y-%m-%d"),as.Date("2020-12-31",format="%Y-%m-%d"))) +
ylim(0, 3.3E-5) +
#Pre-lockdown
geom_rect(data=urban.weekly.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-01-23",format="%Y-%m-%d"),ymin=3.25E-5,ymax=3.3E-5),fill = "#c0c0c0",color = NA, alpha=0.02) +
geom_text(data=urban.weekly.df,aes(x=as.Date("2020-01-08"),y=3.275E-5),label="Pre-Lockdown",color = "black", size = 1.5) +
geom_vline(data=urban.weekly.df,xintercept = as.Date("2020-01-23",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Lockdown 1
geom_rect(data=urban.weekly.df,aes(xmin=as.Date("2020-01-23",format="%Y-%m-%d"),xmax=as.Date("2020-07-01",format="%Y-%m-%d"),ymin=3.25E-5,ymax=3.3E-5),fill = "#fa9796",color = NA,alpha=0.02) +
geom_text(data=urban.weekly.df,aes(x=as.Date("2020-04-01"),y=3.275E-5),label="Lockdown 1", color = "black", size = 1.5) +
geom_vline(data=urban.weekly.df,xintercept=as.Date("2020-07-01",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Re-opening Green PHASE
geom_rect(data=urban.weekly.df,aes(xmin=as.Date("2020-07-01",format="%Y-%m-%d"),xmax=as.Date("2020-12-31",format="%Y-%m-%d"),ymin=3.25E-5,ymax=3.3E-5),fill = "#b5dea5",color = NA,alpha=0.02) +
geom_text(data=urban.weekly.df,aes(x=as.Date("2020-09-15"),y=3.275E-5),label="Re-Opening", color = "black", size = 1.5) +
scale_alpha_manual(values = c(0.4, 1.1)) +
scale_color_manual(values = c("gray", "blue", "black")) +
ggsave("urban_time_series.pdf")covid.weekly.df <- mean.newcases.df %>% inner_join(mean.newdeaths.df, by = c("Date", "Week"))
total.weekly.df <- urban.weekly.df %>% left_join(covid.weekly.df, by = c("Date", "Week"))
#Cases
mean.newcases.plot <- ggplot(total.weekly.df, aes(x = Date, y = Mean_Weekly_Cases, group = Period, color = Period)) +
geom_line(size = 0.75) +
theme_bw() +
time.series.theme() +
ggtitle("World Weekly Averaged COVID Cases") +
ylab("Number of Cases") +
scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2019-12-25",format="%Y-%m-%d"),as.Date("2020-12-31",format="%Y-%m-%d"))) +
ylim(0, 840000) +
#Pre-lockdown
geom_rect(data=total.weekly.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-01-23",format="%Y-%m-%d"),ymin=8.3E5,ymax=8.4E5),fill = "#c0c0c0",color = NA, alpha=0.02) +
geom_text(data=total.weekly.df,aes(x=as.Date("2020-01-08"),y=8.35E5),label="Pre-Lockdown",color = "black", size = 1.5) +
geom_vline(data=total.weekly.df,xintercept = as.Date("2020-01-23",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Lockdown 1
geom_rect(data=total.weekly.df,aes(xmin=as.Date("2020-01-23",format="%Y-%m-%d"),xmax=as.Date("2020-07-01",format="%Y-%m-%d"),ymin=8.3E5,ymax=8.4E5),fill = "#fa9796",color = NA,alpha=0.02) +
geom_text(data=total.weekly.df,aes(x=as.Date("2020-04-01"),y=8.35E5),label="Lockdown 1", color = "black", size = 1.5) +
geom_vline(data=total.weekly.df,xintercept=as.Date("2020-07-01",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Re-opening Green PHASE
geom_rect(data=total.weekly.df,aes(xmin=as.Date("2020-07-01",format="%Y-%m-%d"),xmax=as.Date("2020-12-31",format="%Y-%m-%d"),ymin=8.3E5,ymax=8.4E5),fill = "#b5dea5",color = NA,alpha=0.02) +
geom_text(data=total.weekly.df,aes(x=as.Date("2020-09-15"),y=8.35E5),label="Re-Opening", color = "black", size = 1.5) +
scale_alpha_manual(values = c(0.4, 1.1)) +
scale_color_manual(values = c("gray", "blue", "black"))
#Deaths
mean.newdeaths.plot <- ggplot(total.weekly.df , aes(x = Date, y = Mean_Weekly_Deaths, group = Period, color = Period)) +
geom_line(size = 0.75) +
theme_bw() +
time.series.theme() +
ggtitle("World Weekly Averaged COVID Deaths") +
ylab("Number of Deaths") +
scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2019-12-25",format="%Y-%m-%d"),as.Date("2020-12-31",format="%Y-%m-%d"))) +
ylim(0, 1.5E4) +
#Pre-lockdown
geom_rect(data=total.weekly.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-01-23",format="%Y-%m-%d"),ymin=1.45E4,ymax=1.5E4),fill = "#c0c0c0",color = NA, alpha=0.02) +
geom_text(data=total.weekly.df,aes(x=as.Date("2020-01-08"),y=1.475E4),label="Pre-Lockdown",color = "black", size = 1.5) +
geom_vline(data=total.weekly.df,xintercept = as.Date("2020-01-23",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Lockdown 1
geom_rect(data=total.weekly.df,aes(xmin=as.Date("2020-01-23",format="%Y-%m-%d"),xmax=as.Date("2020-07-01",format="%Y-%m-%d"),ymin=1.45E4,ymax=1.5E4),fill = "#fa9796",color = NA,alpha=0.02) +
geom_text(data=total.weekly.df,aes(x=as.Date("2020-04-01"),y=1.475E4),label="Lockdown 1", color = "black", size = 1.5) +
geom_vline(data=total.weekly.df,xintercept=as.Date("2020-07-01",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Re-opening Green PHASE
geom_rect(data=total.weekly.df,aes(xmin=as.Date("2020-07-01",format="%Y-%m-%d"),xmax=as.Date("2020-12-31",format="%Y-%m-%d"),ymin=1.45E4,ymax=1.5E4),fill = "#b5dea5",color = NA,alpha=0.02) +
geom_text(data=total.weekly.df,aes(x=as.Date("2020-09-15"),y=1.475E4),label="Re-Opening", color = "black", size = 1.5) +
scale_alpha_manual(values = c(0.4, 1.1)) +
scale_color_manual(values = c("gray", "blue", "black"))
NO2.vs.cases <- plot_grid(urban.plot, mean.newcases.plot, labels = c('A', 'B'), ncol = 1, align = "v")
ggsave("NO2_vs_cases.pdf", NO2.vs.cases)
NO2.vs.cases NO2.vs.deaths <- plot_grid(urban.plot, mean.newdeaths.plot, labels = c('A', 'B'), ncol = 1, align = "v")
ggsave("NO2_vs_deaths.pdf", NO2.vs.deaths)
NO2.vs.deaths us.urban.sf <- us.urban.sf %>% filter(UATYP10=="U")
#2019 data.
#Creates list of raster files from folder to call in function.
us.rastlist.2019 <- list.files(path = "US_time_analysis_2019", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
#Creates list of dates to call in function.
dates.2019 <- seq(as.Date("2018-12-31"), as.Date("2019-12-29"), by=7) #List of dates starting Dec 31 2018.
#Function to make data frame for weekly average NO2 concentration.
raster.extract.fxn <- function(x, y) {
r <- raster(paste0("US_time_analysis_2019/", x)) %>%
flip(direction='y')
r.vals <- exact_extract(r, us.urban.sf, weights = 'area', 'mean')
r.mean <- mean(r.vals, na.rm = TRUE)
final.df <- data.frame("NO2_Concentration" = r.mean)
Date <- as.factor(as.Date(dates.2019[[y]] %m+% years(1))) #Adds 1 year to date to match x axis of 2020 dates.
Week <- paste0(as.character(as.Date(dates.2019[[y]])), " to ", as.character(as.Date(dates.2019[[y]]) + 6))
Period <- rep(2019, length.out = nrow(final.df))
us.urban.2019.weekly <- cbind(final.df, Period, Date, Week)
us.urban.2019.weekly <- us.urban.2019.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
x <- as.list(us.rastlist.2019)
y <- as.list(1:length(dates.2019))
us.urban.2019.weekly.df <- purrr::pmap_dfr(list(x, y), raster.extract.fxn)
#2020 data.
#Creates list of raster files from folder to call in function.
us.rastlist.2020 <- list.files(path = "US_time_analysis_2020", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
#Creates list of dates to call in function.
dates.2020 <- seq(as.Date("2019-12-30"), as.Date("2021-01-03"), by=7) #List of dates starting Dec 30th 2019.
#Function to make data frame for weekly average NO2 concentration.
raster.extract.fxn <- function(x, y) {
r <- raster(paste0("US_time_analysis_2020/", x)) %>%
flip(direction='y')
r.vals <- exact_extract(r, us.urban.sf, weights = 'area', 'mean')
r.mean <- mean(r.vals, na.rm = TRUE)
final.df <- data.frame("NO2_Concentration" = r.mean)
Date <- as.factor(as.Date(dates.2020[[y]]))
Week <- paste0(as.character(as.Date(dates.2020[[y]])), " to ", as.character(as.Date(dates.2020[[y]]) + 6))
Period <- rep(2020, length.out = nrow(final.df))
us.urban.2020.weekly <- cbind(final.df, Period, Date, Week)
us.urban.2020.weekly <- us.urban.2020.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
x <- as.list(us.rastlist.2020)
y <- as.list(1:length(dates.2020))
us.urban.2020.weekly.df <- purrr::pmap_dfr(list(x, y), raster.extract.fxn)
us.urban.weekly.df <- rbind(us.urban.2019.weekly.df, us.urban.2020.weekly.df)
us.urban.weekly.df$Date <- as.Date(us.urban.weekly.df$Date)
us.urban.weekly.df$Period <- as.factor(us.urban.weekly.df$Period)
write.csv(us.urban.weekly.df, file = 'time_series_csv/us.urban.weekly.NO2.csv')
rm(us.urban.weekly.df)us.urban.weekly.df <- read.csv(file = 'time_series_csv/us.urban.weekly.NO2.csv')
us.urban.weekly.df$Date <- as.Date(us.urban.weekly.df$Date)
us.urban.weekly.df$Period <- as.factor(us.urban.weekly.df$Period)
us.urban.plot <- ggplot(us.urban.weekly.df, aes(x = Date, y = NO2_Concentration, group = Period, color = Period)) +
geom_line(size = 0.75) +
theme_bw() +
time.series.theme() +
labs(fill = "Year of Measured Pollution") +
ggtitle("US Urban NO2 in 2019 vs 2020") +
ylab("NO2 Concentration (mol/m^2)") +
scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2019-12-25",format="%Y-%m-%d"),as.Date("2020-12-31",format="%Y-%m-%d"))) +
ylim(0, 4.6E-5) +
#Pre-lockdown
geom_rect(data=us.urban.weekly.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-03-19",format="%Y-%m-%d"),ymin=4.55E-5,ymax=4.6E-5),fill = "#c0c0c0",color = NA, alpha=0.02) +
geom_text(data=us.urban.weekly.df,aes(x=as.Date("2020-02-07"),y=4.575E-5),label="Pre-Lockdown",color = "black", size = 2) +
geom_vline(data=us.urban.weekly.df,xintercept = as.Date("2020-03-19",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Lockdown 1
geom_rect(data=us.urban.weekly.df,aes(xmin=as.Date("2020-03-19",format="%Y-%m-%d"),xmax=as.Date("2020-06-05",format="%Y-%m-%d"),ymin=4.55E-5,ymax=4.6E-5),fill = "#fa9796",color = NA,alpha=0.02) +
geom_text(data=us.urban.weekly.df,aes(x=as.Date("2020-05-01"),y=4.575E-5),label="Lockdown 1", color = "black", size = 2) +
geom_vline(data=us.urban.weekly.df,xintercept=as.Date("2020-06-05",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Re-opening Green PHASE
geom_rect(data=us.urban.weekly.df,aes(xmin=as.Date("2020-06-05",format="%Y-%m-%d"),xmax=as.Date("2020-11-16",format="%Y-%m-%d"), ymin=4.55E-5,ymax=4.6E-5),fill = "#b5dea5",color = NA,alpha=0.02) +
geom_text(data=us.urban.weekly.df,aes(x=as.Date("2020-08-15"),y=4.575E-5),label="Re-Opening", color = "black", size = 2) +
geom_vline(data=us.urban.weekly.df,xintercept=as.Date("2020-11-16",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Lockdown 2
geom_rect(data=us.urban.weekly.df,aes(xmin=as.Date("2020-11-16",format="%Y-%m-%d"),xmax=as.Date("2020-12-31",format="%Y-%m-%d"), ymin=4.55E-5,ymax=4.6E-5),fill = "#fa9796",color = NA,alpha=0.02) +
geom_text(data=us.urban.weekly.df,aes(x=as.Date("2020-12-10"),y=4.575E-5),label="Lockdown 2", color = "black", size = 2) +
scale_alpha_manual(values = c(0.4, 1.1)) +
scale_color_manual(values = c("gray", "blue", "black")) +
ggsave("US_urban_time_series.pdf")us.urban.sp <- as(us.urban.sf, Class = "Spatial")
us.rural.sp <- gDifference(wrld_simpl[wrld_simpl$NAME == "United States", ], us.urban.sp)
us.rural.sf <- st_as_sf(us.rural.sp)
#2019 data.
#Creates list of dates to call in function.
dates.2019 <- seq(as.Date("2018-12-31"), as.Date("2019-12-29"), by=7) #List of dates starting Dec 31 2018.
#Function to make data frame for weekly average NO2 concentration.
raster.extract.fxn <- function(x, y) {
r <- raster(paste0("US_time_analysis_2019/", x)) %>%
flip(direction='y')
r.vals <- exact_extract(r, us.rural.sf, weights = 'area', 'mean')
r.mean <- mean(r.vals, na.rm = TRUE)
final.df <- data.frame("NO2_Concentration" = r.mean)
Date <- as.factor(as.Date(dates.2019[[y]] %m+% years(1))) #Adds 1 year to date to match x axis of 2020 dates.
Week <- paste0(as.character(as.Date(dates.2019[[y]])), " to ", as.character(as.Date(dates.2019[[y]]) + 6))
Period <- rep(2019, length.out = nrow(final.df))
us.rural.2019.weekly <- cbind(final.df, Period, Date, Week)
us.rural.2019.weekly <- us.rural.2019.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
x <- as.list(us.rastlist.2019)
y <- as.list(1:length(dates.2019))
us.rural.2019.weekly.df <- purrr::pmap_dfr(list(x, y), raster.extract.fxn)
#2020 data.
#Creates list of dates to call in function.
dates.2020 <- seq(as.Date("2019-12-30"), as.Date("2021-01-03"), by=7) #List of dates starting Dec 30th 2019.
#Function to make data frame for weekly average NO2 concentration.
raster.extract.fxn <- function(x, y) {
r <- raster(paste0("US_time_analysis_2020/", x)) %>%
flip(direction='y')
r.vals <- exact_extract(r, us.rural.sf, weights = 'area', 'mean')
r.mean <- mean(r.vals, na.rm = TRUE)
final.df <- data.frame("NO2_Concentration" = r.mean)
Date <- as.factor(as.Date(dates.2020[[y]]))
Week <- paste0(as.character(as.Date(dates.2020[[y]])), " to ", as.character(as.Date(dates.2020[[y]]) + 6))
Period <- rep(2020, length.out = nrow(final.df))
us.rural.2020.weekly <- cbind(final.df, Period, Date, Week)
us.rural.2020.weekly <- us.rural.2020.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
x <- as.list(us.rastlist.2020)
y <- as.list(1:length(dates.2020))
us.rural.2020.weekly.df <- purrr::pmap_dfr(list(x, y), raster.extract.fxn)
us.rural.weekly.df <- rbind(us.rural.2019.weekly.df, us.rural.2020.weekly.df)
us.rural.weekly.df$Date <- as.Date(us.rural.weekly.df$Date)
us.rural.weekly.df$Period <- as.factor(us.rural.weekly.df$Period)
write.csv(us.rural.weekly.df, file = 'time_series_csv/us.rural.weekly.NO2.csv')
rm(us.rural.weekly.df)us.rural.weekly.df <- read.csv(file = 'time_series_csv/us.rural.weekly.NO2.csv')
us.rural.weekly.df$Date <- as.Date(us.rural.weekly.df$Date)
us.rural.weekly.df$Period <- as.factor(us.rural.weekly.df$Period)
us.rural.plot <- ggplot(us.rural.weekly.df, aes(x = Date, y = NO2_Concentration, group = Period, color = Period)) +
geom_line(size = 0.75) +
theme_bw() +
time.series.theme() +
labs(fill = "Year of Measured Pollution") +
ggtitle("US Rural NO2 in 2019 vs 2020") +
ylab("NO2 Concentration (mol/m^2)") +
scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2019-12-25",format="%Y-%m-%d"),as.Date("2020-12-31",format="%Y-%m-%d"))) +
ylim(0, 4.6E-5) +
#Pre-lockdown
geom_rect(data=us.rural.weekly.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-03-19",format="%Y-%m-%d"),ymin=4.55E-5,ymax=4.6E-5),fill = "#c0c0c0",color = NA, alpha=0.02) +
geom_text(data=us.rural.weekly.df,aes(x=as.Date("2020-02-07"),y=4.575E-5),label="Pre-Lockdown",color = "black", size = 2) +
geom_vline(data=us.rural.weekly.df,xintercept = as.Date("2020-03-19",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Lockdown 1
geom_rect(data=us.rural.weekly.df,aes(xmin=as.Date("2020-03-19",format="%Y-%m-%d"),xmax=as.Date("2020-06-05",format="%Y-%m-%d"),ymin=4.55E-5,ymax=4.6E-5),fill = "#fa9796",color = NA,alpha=0.02) +
geom_text(data=us.rural.weekly.df,aes(x=as.Date("2020-05-01"),y=4.575E-5),label="Lockdown 1", color = "black", size = 2) +
geom_vline(data=us.rural.weekly.df,xintercept=as.Date("2020-06-05",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Re-opening Green PHASE
geom_rect(data=us.rural.weekly.df,aes(xmin=as.Date("2020-06-05",format="%Y-%m-%d"),xmax=as.Date("2020-11-16",format="%Y-%m-%d"), ymin=4.55E-5,ymax=4.6E-5),fill = "#b5dea5",color = NA,alpha=0.02) +
geom_text(data=us.rural.weekly.df,aes(x=as.Date("2020-08-15"),y=4.575E-5),label="Re-Opening", color = "black", size = 2) +
geom_vline(data=us.rural.weekly.df,xintercept=as.Date("2020-11-16",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Lockdown 2
geom_rect(data=us.rural.weekly.df,aes(xmin=as.Date("2020-11-16",format="%Y-%m-%d"),xmax=as.Date("2020-12-31",format="%Y-%m-%d"), ymin=4.55E-5,ymax=4.6E-5),fill = "#fa9796",color = NA,alpha=0.02) +
geom_text(data=us.rural.weekly.df,aes(x=as.Date("2020-12-10"),y=4.575E-5),label="Lockdown 2", color = "black", size = 2) +
scale_alpha_manual(values = c(0.4, 1.1)) +
scale_color_manual(values = c("gray", "blue", "black")) +
ggsave("US_rural_time_series.pdf")
us.rural.vs.urban <- plot_grid(us.urban.plot, us.rural.plot, labels = c('A', 'B'), ncol = 1)
ggsave("US_rural_vs_urban.pdf", us.rural.vs.urban)
us.rural.vs.urban #Use function in other NO2_Exploratory Analysis to filter out each week from Jan-Dec 2019 and Jan-Dec 2020 for comparison.
#Used code:
# gsutil -m cp \
# > "file names" \
# > COVID-19_NO2/US_time_analysis_2020
# in terminal to copy all files from google cloud storage into folders in working directory.
#2019 data.
us.rastlist.2019 <- list.files(path = "US_time_analysis_2019", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
#Creates list of dates to call in function.
dates.2019 <- seq(as.Date("2018-12-31"), as.Date("2019-12-29"), by=7) #List of dates starting Dec 31 2018.
#Function to make data frame for weekly average NO2 concentration.
raster.extract.fxn <- function(x, y) {
r <- raster(paste0("US_time_analysis_2019/", x)) %>% flip(direction='y')
r.vals <- exact_extract(r, wrld_simpl[wrld_simpl@data$NAME=="United States", ], weights = 'area', 'mean')
r.mean <- mean(r.vals, na.rm = TRUE)
final.df <- data.frame(wrld_simpl[wrld_simpl@data$NAME=="United States", ]@data, r.mean)
Date <- as.factor(as.Date(dates.2019[[y]] %m+% years(1))) #Adds 1 year to date to match x axis of 2020 dates.
Week <- paste0(as.character(as.Date(dates.2019[[y]])), " to ", as.character(as.Date(dates.2019[[y]]) + 6))
Period <- rep(2019, length.out = nrow(final.df))
US.2019.weekly <- cbind(final.df, Date, Week, Period)
rm(r.mean, r.vals)
names(US.2019.weekly)[12] <- "NO2_Concentration"
US.2019.weekly <- US.2019.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
x <- us.rastlist.2019
y <- 1:length(dates.2019)
US.2019.weekly.df <- purrr::map2_dfr(x, y, raster.extract.fxn)
#2020 data.
us.rastlist.2020 <- list.files(path = "US_time_analysis_2020", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
#Creates list of dates to call in function.
dates.2020 <- seq(as.Date("2019-12-30"), as.Date("2021-01-03"), by=7) #List of dates starting Dec 30th 2019.
#Function to make data frame for weekly average NO2 concentration.
raster.extract.fxn <- function(x, y) {
r <- raster(paste0("US_time_analysis_2020/", x)) %>% flip(direction='y')
r.vals <- exact_extract(r, wrld_simpl[wrld_simpl@data$NAME=="United States", ], weights = 'area', 'mean')
r.mean <- mean(r.vals, na.rm = TRUE)
final.df <- data.frame(wrld_simpl[wrld_simpl@data$NAME=="United States", ]@data, r.mean)
Date <- as.factor(as.Date(dates.2020[[y]]))
Week <- paste0(as.character(as.Date(dates.2020[[y]])), " to ", as.character(as.Date(dates.2020[[y]]) + 6))
Period <- rep(2020, length.out = nrow(final.df))
US.2020.weekly <- cbind(final.df, Date, Week, Period) # Add new column to data
rm(r.mean, r.vals)
names(US.2020.weekly)[12] <- "NO2_Concentration"
US.2020.weekly <- US.2020.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
x <- us.rastlist.2020
y <- 1:length(dates.2020)
US.2020.weekly.df <- purrr::map2_dfr(x, y, raster.extract.fxn)
US.weekly.df <- rbind(US.2019.weekly.df, US.2020.weekly.df)
US.weekly.df$Date <- as.Date(US.weekly.df$Date)
US.weekly.df$Period <- as.factor(US.weekly.df$Period)
write.csv(US.weekly.df, file = 'time_series_csv/us.weekly.NO2.csv')
rm(US.weekly.df)us.weekly.df <- read.csv(file = 'time_series_csv/us.weekly.NO2.csv')
us.weekly.df$Date <- as.Date(us.weekly.df$Date)
us.weekly.df$Period <- as.factor(us.weekly.df$Period)
ggplot(us.weekly.df, aes(x = Date, y = NO2_Concentration, group = Period, color = Period)) +
geom_line(size = 0.75) +
theme_bw() +
time.series.theme() +
labs(fill = "Year of Measured Pollution") +
ggtitle("US NO2 in 2019 vs 2020") +
ylab("NO2 Concentration (mol/m^2)") +
scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2019-12-25",format="%Y-%m-%d"),as.Date("2020-12-31",format="%Y-%m-%d"))) +
ylim(0, 3E-5) +
#Pre-lockdown
geom_rect(data=us.weekly.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-03-19",format="%Y-%m-%d"),ymin=2.95E-5,ymax=3E-5),fill = "#c0c0c0",color = NA, alpha=0.02) +
geom_text(data=us.weekly.df,aes(x=as.Date("2020-02-07"),y=2.975E-5),label="Pre-Lockdown",color = "black", size = 2) +
geom_vline(data=us.weekly.df,xintercept = as.Date("2020-03-19",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Lockdown 1
geom_rect(data=us.weekly.df,aes(xmin=as.Date("2020-03-19",format="%Y-%m-%d"),xmax=as.Date("2020-06-05",format="%Y-%m-%d"),ymin=2.95E-5,ymax=3E-5),fill = "#fa9796",color = NA,alpha=0.02) +
geom_text(data=us.weekly.df,aes(x=as.Date("2020-05-01"),y=2.975E-5),label="Lockdown 1", color = "black", size = 2) +
geom_vline(data=us.weekly.df,xintercept=as.Date("2020-06-05",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Re-opening Green PHASE
geom_rect(data=us.weekly.df,aes(xmin=as.Date("2020-06-05",format="%Y-%m-%d"),xmax=as.Date("2020-11-16",format="%Y-%m-%d"), ymin=2.95E-5,ymax=3E-5),fill = "#b5dea5",color = NA,alpha=0.02) +
geom_text(data=us.weekly.df,aes(x=as.Date("2020-08-15"),y=2.975E-5),label="Re-Opening", color = "black", size = 2) +
geom_vline(data=us.weekly.df,xintercept=as.Date("2020-11-16",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Lockdown 2
geom_rect(data=us.weekly.df,aes(xmin=as.Date("2020-11-16",format="%Y-%m-%d"),xmax=as.Date("2020-12-31",format="%Y-%m-%d"), ymin=2.95E-5,ymax=3E-5),fill = "#fa9796",color = NA,alpha=0.02) +
geom_text(data=us.weekly.df,aes(x=as.Date("2020-12-10"),y=2.975E-5),label="Lockdown 2", color = "black", size = 2) +
scale_alpha_manual(values = c(0.4, 1.1)) +
scale_color_manual(values = c("gray", "blue", "black")) +
ggsave("US_time_series.pdf")#Use function in other NO2_Exploratory Analysis file to filter out each week from Jan-Dec 2019 and Jan-Dec 2020 for comparison.
#Used code:
# gsutil -m cp \
# > "file names" \
# > COVID-19_NO2/China_time_analysis_2020
# in terminal to copy all files from google cloud storage into folders in working directory.
#2019 data.
#Creates list of raster files from folder to call in function.
china.rastlist.2019 <- list.files(path = "China_time_analysis_2019", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
#Creates list of dates to call in function.
dates.2019 <- seq(as.Date("2018-11-05"), as.Date("2019-11-10"), by=7) #List of dates starting Dec 31 2018.
#Function to make data frame for weekly average NO2 concentration.
raster.extract.fxn <- function(x, y) {
r <- raster(paste0("China_time_analysis_2019/", x)) %>% flip(direction='y')
r.vals <- exact_extract(r, wrld_simpl[wrld_simpl@data$NAME=="China", ], weights = 'area', 'mean')
r.mean <- mean(r.vals, na.rm = TRUE)
final.df <- data.frame(wrld_simpl[wrld_simpl@data$NAME=="China", ]@data, r.mean)
Date <- as.factor(as.Date(dates.2019[[y]] %m+% years(1))) #Adds 1 year to date to match x axis of 2020 dates.
Week <- paste0(as.character(as.Date(dates.2019[[y]])), " to ", as.character(as.Date(dates.2019[[y]]) + 6))
Period <- rep("2018-2019", length.out = nrow(final.df))
China.2019.weekly <- cbind(final.df, Date, Week, Period)
rm(r.mean, r.vals)
names(China.2019.weekly)[12] <- "NO2_Concentration"
China.2019.weekly <- China.2019.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
x <- china.rastlist.2019
y <- 1:length(dates.2019)
China.2019.weekly.df <- purrr::map2_dfr(x, y, raster.extract.fxn)
#2020 data.
#Creates list of raster files from folder to call in function.
china.rastlist.2020 <- list.files(path = "China_time_analysis_2020", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
#Creates list of dates to call in function.
dates.2020 <- seq(as.Date("2019-11-04"), as.Date("2020-11-08"), by=7) #List of dates starting Dec 30th 2019.
#Function to make data frame for weekly average NO2 concentration.
raster.extract.fxn <- function(x, y) {
r <- raster(paste0("China_time_analysis_2020/", x)) %>% flip(direction='y')
r.vals <- exact_extract(r, wrld_simpl[wrld_simpl@data$NAME=="China", ], weights = 'area', 'mean')
r.mean <- mean(r.vals, na.rm = TRUE)
final.df <- data.frame(wrld_simpl[wrld_simpl@data$NAME=="China", ]@data, r.mean)
Date <- as.factor(as.Date(dates.2020[[y]]))
Week <- paste0(as.character(as.Date(dates.2020[[y]])), " to ", as.character(as.Date(dates.2020[[y]]) + 6))
Period <- rep("2019-2020", length.out = nrow(final.df))
China.2020.weekly <- cbind(final.df, Date, Week, Period) # Add new column to data
rm(r.mean, r.vals)
names(China.2020.weekly)[12] <- "NO2_Concentration"
China.2020.weekly <- China.2020.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
x <- china.rastlist.2020
y <- 1:length(dates.2020)
China.2020.weekly.df <- purrr::map2_dfr(x, y, raster.extract.fxn)
China.weekly.df <- rbind(China.2019.weekly.df, China.2020.weekly.df)
China.weekly.df$Date <- as.Date(China.weekly.df$Date)
China.weekly.df$Period <- as.factor(China.weekly.df$Period)
max(China.weekly.df$NO2_Concentration)
write.csv(China.weekly.df, file = 'time_series_csv/china.weekly.NO2.csv')
rm(China.weekly.df)china.weekly.df <- read.csv(file = 'time_series_csv/china.weekly.NO2.csv')
china.weekly.df$Date <- as.Date(china.weekly.df$Date)
china.weekly.df$Period <- as.factor(china.weekly.df$Period)
ggplot(china.weekly.df, aes(x = Date, y = NO2_Concentration, group = Period, color = Period)) +
geom_line(size = 0.75) +
labs(fill = "Year of Measured Pollution") +
ggtitle("China NO2 in 2019 vs 2020") +
ylab("NO2 Concentration (mol/m^2)") +
scale_x_date(date_labels = "%b-%Y",date_breaks = "1 month",limits=c(as.Date("2019-11-01",format="%Y-%m-%d"),as.Date("2020-11-10",format="%Y-%m-%d"))) +
ylim(0,6E-5) +
theme_bw() +
time.series.theme() +
#Pre-lockdown
geom_rect(data=china.weekly.df,aes(xmin=as.Date("2019-11-01",format="%Y-%m-%d"),xmax=as.Date("2020-01-23",format="%Y-%m-%d"),ymin=5.9E-5,ymax=6E-5),fill = "#c0c0c0",color = NA, alpha=0.02) +
geom_text(data=china.weekly.df,aes(x=as.Date("2019-12-15"),y=5.95E-5),label="Pre-Lockdown",color = "black", size = 2) +
geom_vline(data=china.weekly.df,xintercept = as.Date("2020-01-23",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Lockdown 1
geom_rect(data=china.weekly.df,aes(xmin=as.Date("2020-01-23",format="%Y-%m-%d"),xmax=as.Date("2020-04-08",format="%Y-%m-%d"),ymin=5.9E-5,ymax=6E-5),fill = "#fa9796",color = NA,alpha=0.02) +
geom_text(data=china.weekly.df,aes(x=as.Date("2020-03-01"),y=5.95E-5),label="Lockdown 1", color = "black", size = 2) +
geom_vline(data=china.weekly.df,xintercept=as.Date("2020-04-08",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Re-opening Green PHASE
geom_rect(data=china.weekly.df,aes(xmin=as.Date("2020-04-08",format="%Y-%m-%d"),xmax=as.Date("2020-11-08",format="%Y-%m-%d"), ymin=5.9E-5,ymax=6E-5),fill = "#b5dea5",color = NA,alpha=0.02) +
geom_text(data=china.weekly.df,aes(x=as.Date("2020-08-01"),y=5.95E-5),label="Re-Opening", color = "black", size = 2) +
scale_alpha_manual(values = c(0.4, 1.1)) +
scale_color_manual(values = c("gray", "blue", "black")) +
ggsave("China_time_series.pdf")#Converting before lockdown and after lockdown rasters for US to data frames.
US.NO2.change.fxn <- function(file) {
r <- raster(paste0('./US_NO2_comparison/', file)) %>%
flip(direction='y')
df <- r %>% crop(., extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ])) %>%
mask(., wrld_simpl[wrld_simpl@data$NAME=="United States", ]) %>%
rasterToPoints(spatial = TRUE) %>%
data.frame() %>%
select(NO2_Concentration = 1, Longitude = 2, Latitude = 3)
df
}
files <- c("US_NO2_2019-01-19_start.tif", "US_NO2_2019-03-19_start.tif", "US_NO2_2020-01-19_start.tif", "US_NO2_2020-03-19_start.tif", "US_NO2_2021-01-19_start.tif", "US_NO2_2021-03-19_start.tif")
US.NO2.list <- purrr::map(files, US.NO2.change.fxn)
#Plotting before and after maps.
US.NO2.change.map.fxn <- function(x, y, t1, t2) {
p1 <- ggplot() +
geom_raster(data = x, aes(x = Longitude, y = Latitude, fill = NO2_Concentration)) +
geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="United States", ], aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
my_theme() +
ggtitle(paste("US NO2 Concentrations (Sentinel Satellite)", t1)) +
scale_fill_gradientn(name = "NO2 Concentration \n(mol/m^2)", colours = myPalette(100), limits=c(0, 10E-5)) +
north(x.min = -130, y.min = 23, x.max = -65, y.max = 49, anchor = c(x=-119, y=32), symbol=12) +
scalebar(x.min = -130, y.min = 23, x.max = -65, y.max = 49, dist = 200, dist_unit = 'km', transform = TRUE, anchor = c(x=-120, y=27.5), st.size = 1.5, model = 'WGS84') +
xlim(-130,-65) +
ylim(23,49) +
coord_quickmap()
p2 <- ggplot() +
geom_raster(data = y, aes(x = Longitude, y = Latitude, fill = NO2_Concentration)) +
geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="United States", ], aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
my_theme() +
ggtitle(paste("US NO2 Concentrations (Sentinel Satellite)", t2)) +
scale_fill_gradientn(name = "NO2 Concentration \n(ppb)", colours = myPalette(100), limits=c(0, 10E-5)) +
north(x.min = -130, y.min = 23, x.max = -65, y.max = 49, anchor = c(x=-119, y=32), symbol=12) +
scalebar(x.min = -130, y.min = 23, x.max = -65, y.max = 49, dist = 200, dist_unit = 'km', transform = TRUE, anchor = c(x=-120, y=27.5), st.size = 1.5, model = 'WGS84') +
xlim(-130,-65) +
ylim(23,49) +
coord_quickmap()
plot_grid(p1, p2, labels = "AUTO", ncol = 1)
}
US.NO2.change.map.fxn(data.frame(US.NO2.list[1]), data.frame(US.NO2.list[2]), "Jan-March 2019", "March-May 2019")US.NO2.change.map.fxn(data.frame(US.NO2.list[3]), data.frame(US.NO2.list[4]), "Jan-March 2020", "March-May 2020")US.NO2.change.map.fxn(data.frame(US.NO2.list[5]), data.frame(US.NO2.list[6]), "Jan-March 2021", "March-May 2021")#Function for finding medians over each time period.
US_NO2_median <- function(x) {
r <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
r.vals <- raster::extract(r, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
lapply(r.vals, FUN=median, na.rm = TRUE)
}
US_NO2_median("US_NO2_2020-01-19_start.tif")## [[1]]
## [1] 1.965982e-05
US_NO2_median("US_NO2_2020-03-19_start.tif")## [[1]]
## [1] 1.799675e-05
#Function for finding averages over each time period.
US_NO2_average <- function(x) {
r <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
r.vals <- raster::extract(r, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
lapply(r.vals, FUN=mean, na.rm = TRUE)
}
US_NO2_average("US_NO2_2020-01-19_start.tif")## [[1]]
## [1] 2.182116e-05
US_NO2_average("US_NO2_2020-03-19_start.tif")## [[1]]
## [1] 1.934275e-05
#2019 violin plot
US_2019_violin <- function(x, y) {
r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r1 <- r1 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2019", length.out = nrow(r1)) %>% as.data.frame()
r1 <- cbind(vec, r1)
names(r1)[1] <- "Timeframe"
names(r1)[4] <- "NO2_Concentration"
r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r2 <- r2 %>% rasterToPoints() %>% data.frame()
vec <- rep("March-May 2019", length.out = nrow(r2)) %>% as.data.frame()
r2 <- cbind(vec, r2)
names(r2)[1] <- "Timeframe"
names(r2)[4] <- "NO2_Concentration"
df <- rbind(r1, r2)
ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) +
geom_violin(fill="gray") +
ggtitle('US NO2: 2019 Seasonal Change') +
ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
US_2019_violin("US_NO2_2019-01-19_start.tif", "US_NO2_2019-03-19_start.tif")#2020 violin plot
US_2020_violin <- function(x, y) {
r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r1 <- r1 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2020", length.out = nrow(r1)) %>% as.data.frame()
r1 <- cbind(vec, r1)
names(r1)[1] <- "Timeframe"
names(r1)[4] <- "NO2_Concentration"
r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r2 <- r2 %>% rasterToPoints() %>% data.frame()
vec <- rep("March-May 2020", length.out = nrow(r2)) %>% as.data.frame()
r2 <- cbind(vec, r2)
names(r2)[1] <- "Timeframe"
names(r2)[4] <- "NO2_Concentration"
df <- rbind(r1, r2)
ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) +
geom_violin(fill="gray") +
ggtitle('US NO2 Before and After COVID-19') +
ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
US_2020_violin("US_NO2_2020-01-19_start.tif", "US_NO2_2020-03-19_start.tif")#2021 violin plot
US_2021_violin <- function(x, y) {
r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r1 <- r1 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2021", length.out = nrow(r1)) %>% as.data.frame()
r1 <- cbind(vec, r1)
names(r1)[1] <- "Timeframe"
names(r1)[4] <- "NO2_Concentration"
r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r2 <- r2 %>% rasterToPoints() %>% data.frame()
vec <- rep("March-May 2021", length.out = nrow(r2)) %>% as.data.frame()
r2 <- cbind(vec, r2)
names(r2)[1] <- "Timeframe"
names(r2)[4] <- "NO2_Concentration"
df <- rbind(r1, r2)
ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) +
geom_violin(fill="gray") +
ggtitle('US NO2: 2021 Seasonal Change') +
ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
US_2021_violin("US_NO2_2021-01-19_start.tif", "US_NO2_2021-03-19_start.tif")#All violin plots in 1 graph:
US_violin <- function(a, b, c, d, e, f) {
r1 <- raster(paste0("./US_NO2_Comparison/", (a))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r1 <- r1 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2019", length.out = nrow(r1)) %>% as.data.frame()
r1 <- cbind(vec, r1)
names(r1)[1] <- "Timeframe"
names(r1)[4] <- "NO2_Concentration"
r2 <- raster(paste0("./US_NO2_Comparison/", (b))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r2 <- r2 %>% rasterToPoints() %>% data.frame()
vec <- rep("March-May 2019", length.out = nrow(r2)) %>% as.data.frame()
r2 <- cbind(vec, r2)
names(r2)[1] <- "Timeframe"
names(r2)[4] <- "NO2_Concentration"
r3 <- raster(paste0("./US_NO2_Comparison/", (c))) %>% flip(direction='y')
r3 <- crop(r3, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r3, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r3 <- r3 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2020", length.out = nrow(r3)) %>% as.data.frame()
r3 <- cbind(vec, r3)
names(r3)[1] <- "Timeframe"
names(r3)[4] <- "NO2_Concentration"
r4 <- raster(paste0("./US_NO2_Comparison/", (d))) %>% flip(direction='y')
r4 <- crop(r4, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r4, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r4 <- r4 %>% rasterToPoints() %>% data.frame()
vec <- rep("March-May 2020", length.out = nrow(r4)) %>% as.data.frame()
r4 <- cbind(vec, r4)
names(r4)[1] <- "Timeframe"
names(r4)[4] <- "NO2_Concentration"
r5 <- raster(paste0("./US_NO2_Comparison/", (e))) %>% flip(direction='y')
r5 <- crop(r5, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r5, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r5 <- r5 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2021", length.out = nrow(r5)) %>% as.data.frame()
r5 <- cbind(vec, r5)
names(r5)[1] <- "Timeframe"
names(r5)[4] <- "NO2_Concentration"
r6 <- raster(paste0("./US_NO2_Comparison/", (f))) %>% flip(direction='y')
r6 <- crop(r6, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r6, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r6 <- r6 %>% rasterToPoints() %>% data.frame()
vec <- rep("March-May 2021", length.out = nrow(r6)) %>% as.data.frame()
r6 <- cbind(vec, r6)
names(r6)[1] <- "Timeframe"
names(r6)[4] <- "NO2_Concentration"
df <- rbind(r1, r2, r3, r4, r5, r6)
ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) +
geom_violin(fill="gray") +
ggtitle('US NO2: 2019 Seasonal Change') +
ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
theme(axis.text.x = element_text(angle = 45)) +
scale_x_discrete(limits=c("Jan-March 2019", "March-May 2019", "Jan-March 2020", "March-May 2020", "Jan-March 2021", "March-May 2021")) +
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
US_violin("US_NO2_2019-01-19_start.tif", "US_NO2_2019-03-19_start.tif", "US_NO2_2020-01-19_start.tif", "US_NO2_2020-03-19_start.tif", "US_NO2_2021-01-19_start.tif", "US_NO2_2021-03-19_start.tif")US_2019_boxplot <- function(x, y) {
r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
s <- stack(r1, r2)
boxplot(s, col=c('red', 'blue'),
main='US NO2: 2019 Seasonal Change',
ylab='Date',
names = c('2019 Jan-Mar', '2019 Mar-May'))
}
US_2019_boxplot("US_NO2_2019-01-19_start.tif", "US_NO2_2019-03-19_start.tif")US_2020_boxplot <- function(x, y) {
r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
s <- stack(r1, r2)
boxplot(s, col=c('red', 'blue'),
main='US NO2 Before and After COVID-19',
ylab='Date',
names = c('2020 Jan-Mar', '2020 Mar-May'))
}
US_2020_boxplot("US_NO2_2020-01-19_start.tif", "US_NO2_2020-03-19_start.tif")US_2021_boxplot <- function(x, y) {
r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
s <- stack(r1, r2)
boxplot(s, col=c('red', 'blue'),
main='US NO2: 2021 Seasonal Change',
ylab='Date',
names = c('2021 Jan-Mar', '2021 Mar-May'))
}
US_2021_boxplot("US_NO2_2021-01-19_start.tif", "US_NO2_2021-03-19_start.tif")#Converting before lockdown and after lockdown rasters for China to data frames.
China.NO2.change.fxn <- function(file) {
r <- raster(paste0('./China_NO2_comparison/', file)) %>%
flip(direction='y')
df <- r %>% crop(., extent(wrld_simpl[wrld_simpl@data$NAME=="China", ])) %>%
mask(., wrld_simpl[wrld_simpl@data$NAME=="China", ]) %>%
rasterToPoints(spatial = TRUE) %>%
data.frame() %>%
select(NO2_Concentration = 1, Longitude = 2, Latitude = 3)
df
}
files <- c("China_NO2_2018-11-23_start.tif", "China_NO2_2019-01-23_start.tif", "China_NO2_2019-11-23_start.tif", "China_NO2_2020-01-23_start.tif", "China_NO2_2020-11-23_start.tif", "China_NO2_2021-01-23_start.tif")
China.NO2.list <- purrr::map(files, China.NO2.change.fxn)
#Plotting before and after maps.
China.NO2.change.map.fxn <- function(x, y, t1, t2) {
p1 <- ggplot() +
geom_raster(data = x, aes(x = Longitude, y = Latitude, fill = NO2_Concentration)) +
geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="China", ], aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
my_theme() +
ggtitle(paste("China NO2 Concentrations (Sentinel Satellite)", t1)) +
scale_fill_gradientn(name = "NO2 Concentration \n(mol/m^2)", colours = myPalette(100), limits=c(0, 5.5E-4)) +
north(x.min = 73, y.min = 18, x.max = 135, y.max = 54, anchor = c(x=84, y=28), symbol=12) +
scalebar(x.min = 73, y.min = 18, x.max = 135, y.max = 54, dist = 250, dist_unit = 'km', transform = TRUE, anchor = c(x=84, y=22.5), st.size = 1.5, height = 0.02, model = 'WGS84') +
xlim(73, 135) +
ylim(18, 54) +
coord_quickmap()
p2 <- ggplot() +
geom_raster(data = y, aes(x = Longitude, y = Latitude, fill = NO2_Concentration)) +
geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="China", ], aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
my_theme() +
ggtitle(paste("China NO2 Concentrations (Sentinel Satellite)", t2)) +
scale_fill_gradientn(name = "NO2 Concentration \n(ppb)", colours = myPalette(100), limits=c(0, 5.5E-4)) +
north(x.min = 73, y.min = 18, x.max = 135, y.max = 54, anchor = c(x=84, y=28), symbol=12) +
scalebar(x.min = 73, y.min = 18, x.max = 135, y.max = 54, dist = 250, dist_unit = 'km', transform = TRUE, anchor = c(x=84, y=22.5), st.size = 1.5, height = 0.02, model = 'WGS84') +
xlim(73, 135) +
ylim(18, 54) +
coord_quickmap()
plot_grid(p1, p2, labels = "AUTO", ncol = 1)
}
China.NO2.change.map.fxn(data.frame(China.NO2.list[1]), data.frame(China.NO2.list[2]), "Nov-Jan 2019", "Jan-March 2019")China.NO2.change.map.fxn(data.frame(China.NO2.list[3]), data.frame(China.NO2.list[4]), "Nov-Jan 2020", "Jan-March 2020")China.NO2.change.map.fxn(data.frame(China.NO2.list[5]), data.frame(China.NO2.list[6]), "Nov-Jan 2021", "Jan-March 2021")#Function for finding medians over each time period.
China_NO2_median <- function(x) {
r <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
r.vals <- raster::extract(r, wrld_simpl[wrld_simpl@data$NAME=="China", ])
lapply(r.vals, FUN=median, na.rm = TRUE)
}
China_NO2_median("China_NO2_2019-11-23_start.tif")## [[1]]
## [1] 1.576366e-05
China_NO2_median("China_NO2_2020-01-23_start.tif")## [[1]]
## [1] 1.231765e-05
#Function for finding averages over each time period.
China_NO2_average <- function(x) {
r <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
r.vals <- raster::extract(r, wrld_simpl[wrld_simpl@data$NAME=="China", ])
lapply(r.vals, FUN=mean, na.rm = TRUE)
}
China_NO2_average("China_NO2_2019-11-23_start.tif")## [[1]]
## [1] 4.035193e-05
China_NO2_average("China_NO2_2020-01-23_start.tif") ## [[1]]
## [1] 2.19273e-05
#2019 violin plot
China_2019_violin <- function(x, y) {
r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r1 <- r1 %>% rasterToPoints() %>% data.frame()
vec <- rep("Nov2018-Jan2019", length.out = nrow(r1)) %>% as.data.frame()
r1 <- cbind(vec, r1)
names(r1)[1] <- "Timeframe"
names(r1)[4] <- "NO2_Concentration"
r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r2 <- r2 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2019", length.out = nrow(r2)) %>% as.data.frame()
r2 <- cbind(vec, r2)
names(r2)[1] <- "Timeframe"
names(r2)[4] <- "NO2_Concentration"
df <- rbind(r1, r2)
ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) +
geom_violin(fill="gray") +
ggtitle('China NO2: 2019 Seasonal Change') +
ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
scale_x_discrete(limits=c("Nov2018-Jan2019", "Jan-March 2019")) +
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
China_2019_violin("China_NO2_2018-11-23_start.tif", "China_NO2_2019-01-23_start.tif")#2020 violin plot
China_2020_violin <- function(x, y) {
r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r1 <- r1 %>% rasterToPoints() %>% data.frame()
vec <- rep("Nov2019-Jan2020", length.out = nrow(r1)) %>% as.data.frame()
r1 <- cbind(vec, r1)
names(r1)[1] <- "Timeframe"
names(r1)[4] <- "NO2_Concentration"
r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r2 <- r2 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2020", length.out = nrow(r2)) %>% as.data.frame()
r2 <- cbind(vec, r2)
names(r2)[1] <- "Timeframe"
names(r2)[4] <- "NO2_Concentration"
df <- rbind(r1, r2)
ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) +
geom_violin(fill="gray") +
ggtitle('China NO2 Before and After COVID-19') +
ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
scale_x_discrete(limits=c("Nov2019-Jan2020", "Jan-March 2020")) +
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
China_2020_violin("China_NO2_2019-11-23_start.tif", "China_NO2_2020-01-23_start.tif")#2021 violin plot
China_2021_violin <- function(x, y) {
r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r1 <- r1 %>% rasterToPoints() %>% data.frame()
vec <- rep("Nov2020-Jan2021", length.out = nrow(r1)) %>% as.data.frame()
r1 <- cbind(vec, r1)
names(r1)[1] <- "Timeframe"
names(r1)[4] <- "NO2_Concentration"
r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r2 <- r2 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2021", length.out = nrow(r2)) %>% as.data.frame()
r2 <- cbind(vec, r2)
names(r2)[1] <- "Timeframe"
names(r2)[4] <- "NO2_Concentration"
df <- rbind(r1, r2)
ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) +
geom_violin(fill="gray") +
ggtitle('China NO2: 2021 Seasonal Change') +
ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
scale_x_discrete(limits=c("Nov2020-Jan2021", "Jan-March 2021")) +
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
China_2021_violin("China_NO2_2020-11-23_start.tif", "China_NO2_2021-01-23_start.tif")#All violin plots in 1 graph:
China_violin <- function(a, b, c, d, e, f) {
r1 <- raster(paste0("./China_NO2_Comparison/", (a))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r1 <- r1 %>% rasterToPoints() %>% data.frame()
vec <- rep("Nov2018-Jan2019", length.out = nrow(r1)) %>% as.data.frame()
r1 <- cbind(vec, r1)
names(r1)[1] <- "Timeframe"
names(r1)[4] <- "NO2_Concentration"
r2 <- raster(paste0("./China_NO2_Comparison/", (b))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r2 <- r2 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2019", length.out = nrow(r2)) %>% as.data.frame()
r2 <- cbind(vec, r2)
names(r2)[1] <- "Timeframe"
names(r2)[4] <- "NO2_Concentration"
r3 <- raster(paste0("./China_NO2_Comparison/", (c))) %>% flip(direction='y')
r3 <- crop(r3, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r3, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r3 <- r3 %>% rasterToPoints() %>% data.frame()
vec <- rep("Nov2019-Jan2020", length.out = nrow(r3)) %>% as.data.frame()
r3 <- cbind(vec, r3)
names(r3)[1] <- "Timeframe"
names(r3)[4] <- "NO2_Concentration"
r4 <- raster(paste0("./China_NO2_Comparison/", (d))) %>% flip(direction='y')
r4 <- crop(r4, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r4, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r4 <- r4 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2020", length.out = nrow(r4)) %>% as.data.frame()
r4 <- cbind(vec, r4)
names(r4)[1] <- "Timeframe"
names(r4)[4] <- "NO2_Concentration"
r5 <- raster(paste0("./China_NO2_Comparison/", (e))) %>% flip(direction='y')
r5 <- crop(r5, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r5, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r5 <- r5 %>% rasterToPoints() %>% data.frame()
vec <- rep("Nov2020-Jan2021", length.out = nrow(r5)) %>% as.data.frame()
r5 <- cbind(vec, r5)
names(r5)[1] <- "Timeframe"
names(r5)[4] <- "NO2_Concentration"
r6 <- raster(paste0("./China_NO2_Comparison/", (f))) %>% flip(direction='y')
r6 <- crop(r6, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r6, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r6 <- r6 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2021", length.out = nrow(r6)) %>% as.data.frame()
r6 <- cbind(vec, r6)
names(r6)[1] <- "Timeframe"
names(r6)[4] <- "NO2_Concentration"
df <- rbind(r1, r2, r3, r4, r5, r6)
ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) +
geom_violin(fill="gray") +
ggtitle('China NO2: 2019 Seasonal Change') +
ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
theme(axis.text.x = element_text(angle = 45)) +
scale_x_discrete(limits=c("Nov2018-Jan2019", "Jan-March 2019", "Nov2019-Jan2020", "Jan-March 2020", "Nov2020-Jan2021", "Jan-March 2021")) +
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
China_violin("China_NO2_2018-11-23_start.tif", "China_NO2_2019-01-23_start.tif", "China_NO2_2019-11-23_start.tif", "China_NO2_2020-01-23_start.tif", "China_NO2_2020-11-23_start.tif", "China_NO2_2021-01-23_start.tif")China_2019_change <- function(x, y) {
r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
s <- stack(r1, r2)
boxplot(s, col=c('red', 'blue'),
main='China NO2: 2019 Seasonal Change',
ylab='Date',
names = c('Nov2018-Jan2019', 'Jan-March 2019'))
}
China_2019_change("China_NO2_2018-11-23_start.tif", "China_NO2_2019-01-23_start.tif")China_2020_change <- function(x, y) {
r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
s <- stack(r1, r2)
boxplot(s, col=c('red', 'blue'),
main='China NO2 Before and After COVID-19',
ylab='Date',
names = c('Nov2019-Jan2020', 'Jan-March 2020'))
}
China_2020_change("China_NO2_2019-11-23_start.tif", "China_NO2_2020-01-23_start.tif")China_2021_change <- function(x, y) {
r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
s <- stack(r1, r2)
boxplot(s, col=c('red', 'blue'),
main='China NO2: 2021 Seasonal Change',
ylab='Date',
names = c('Nov2020-Jan2021', 'jan-March 2021'))
}
China_2021_change("China_NO2_2020-11-23_start.tif", "China_NO2_2021-01-23_start.tif")